Naïvest approach.

2017-05-09 23:51:23 +02:00 · 2017-05-09 23:51:23 +02:00 · 053d1f84e2
commit 053d1f84e2
parent 063d6d3836
2 changed files with 221 additions and 0 deletions
--- a/absorption-test.py
+++ b/absorption-test.py
@ -0,0 +1,63 @@
 #!/usr/bin/env python
 import json
 import collections
 examples = [
    {
        "text": "icecream is cold",
        "parsed": ("exists-property-with-value", 'icecream', 'cold'),
    },
    {
        "text": "earth is a planet",
        "parsed": ("pertenence-to-group", 'earth', 'planet'),
    },
    {
        "text": "Green is a color",
        "parsed": ("pertenence-to-group", 'green', 'color'),
    },
    {
        "text": "airplanes do fly",
        "parsed": ("has-capacity", 'airplane', 'fly')
    }
 ]
 knowledge_base = collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict()))
 def property_for_value(value):
    if "cold":
        return "temperature"
 def exists_property_with_value(subj, value):
    knowledge_base[subj][property_for_value(value)] = value
 def pertenence_to_group(subj, group):
    knowledge_base[subj]["group"] = group
 def has_capacity(subj, capacity):
    if "capacities" not in knowledge_base[subj]:
        knowledge_base[subj]["capacities"] = []
    knowledge_base[subj]["capacities"].append(capacity)
 knowledge_ingestion = {
    "exists-property-with-value": exists_property_with_value,
    "pertenence-to-group": pertenence_to_group,
    "has-capacity": has_capacity,
 }
 def ingest(example):
    method = example['parsed'][0]
    args = example['parsed'][1:]
    knowledge_ingestion[method](*args)
 for example in examples:
    ingest(example)
 print(json.dumps(knowledge_base, indent=4, sort_keys=True))
--- a/parsing_test.py
+++ b/parsing_test.py
@ -0,0 +1,158 @@
 #!/usr/bin/env python
 import json
 from functools import reduce
 examples = [
    {
        "text": "icecream is cold",
        "parsed": ("exists-property-with-value", 'icecream', 'cold'),
    },
    {
        "text": "lava is dangerous",
        "parsed": ("exists-property-with-value", 'lava', 'dangerous')
    },
    {
        "text": "earth is a planet",
        "parsed": ("pertenence-to-group", 'earth', 'planet'),
    },
    {
        "text": "Green is a color",
        "parsed": ("pertenence-to-group", 'green', 'color'),
    },
    {
        "text": "a plane can fly",
        "parsed": ("has-capacity", 'plane', 'fly')
    },
    {
        "text": "a wale can swim",
        "parsed": ("has-capacity", 'wale', 'swim')
    }
 ]
 knowledge_base = {
    'icecream': {
        "groups": ['noun', 'object', 'comestible', 'sweet'],
    },
    'lava': {
        "groups": ['noun', 'object'],
    },
    'earth': {
        "groups": ['noun', 'object', 'planet'],
    },
    'green': {
        "groups": ['noun', 'color', 'concept'],
    },
    'plane': {
        "groups": ['noun', 'object', 'vehicle', 'fast'],
    },
    'car': {
        "groups": ['noun', 'object', 'vehicle', 'slow-ish'],
    },
    'wale': {
        "groups": ['noun', 'object', 'living-being']
    },
    'cold': {
        "groups": ['property', 'temperature'],
    },
    'dangerous': {
        "groups": ['property'],
    },
    'planet': {
        "groups": ['noun', 'group'],
    },
    'color': {
        "groups": ['property', 'group'],
    },
    'fly': {
        "groups": ['verb'],
    },
    'swim': {
        "groups": ['verb'],
    },
 }
 def make_template(text, parsed):
    tokens = text.split()
    template = list(parsed)
    for i in range(len(tokens)):
        word = tokens[i]
        if word in template:
            print(word, i, template)
            template[template.index(word)] = i
            tokens[i] = {'groups': set(knowledge_base[word]['groups'])}
    return tokens, template
 def ingest(example):
    text = example["text"].lower()
    parsed = example["parsed"]
    matcher, result = make_template(text, parsed)
    print(text)
    print(parsed)
    print()
    return matcher, result
 language_training = []
 for example in examples:
    language_training.append(ingest(example))
 def get_matching(sample, other):
    l = len(sample[0])
    other = list(filter(lambda x: len(x[0]) == l, other))
    for i in range(l):
        if len(other) == 0:
            return []
        if not isinstance(sample[0][i], str):
            other = list(filter(lambda x: not isinstance(x[0][i], str) and
                                len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,
                                other))
    return [sample[0][x] if isinstance(sample[0][x], str)
            else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b,
                                                            map(lambda y: y[0][x]['groups'],
                                                                other))}
            for x
            in range(l)]
 print('\n'.join(map(str, language_training)))
 print("--")
 pattern_examples = []
 for i, sample in enumerate(language_training):
    other = language_training[:i] + language_training[i + 1:]
    print(sample)
    match = get_matching(sample, other)
    print("->", match)
    if len(match) > 0:
        sample = (match, sample[1],)
    pattern_examples.append(sample)
    print()
 def get_fit(patterns, row):
    for sample, ast in patterns:
        if len(sample) != len(row):
            continue
        if all(map(lambda x: not isinstance(sample[x], str) or sample[x] == row[x],
                   range(len(sample)))):
               return sample, ast
    else:
        return None
 for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
    row = test['text'].lower().split()
    fit = get_fit(pattern_examples, row)
    print(test['text'], fit)
 # while True:
 #     row = input('>>> ').lower().split()
 #     fit = get_fit(pattern_examples, row)
 #     if fit is None:
 #         print("No fit")
 #     else:
 #         print(fit)