Use a single object for both parts of the training.

2017-05-11 01:05:07 +02:00 · 2017-05-11 01:05:07 +02:00 · 5b30713df1
commit 5b30713df1
parent a1925f5383
4 changed files with 227 additions and 221 deletions
--- a/absorption-test.py
+++ b/absorption-test.py
@ -1,63 +0,0 @@
 #!/usr/bin/env python
 import json
 import collections
 examples = [
    {
        "text": "icecream is cold",
        "parsed": ("exists-property-with-value", 'icecream', 'cold'),
    },
    {
        "text": "earth is a planet",
        "parsed": ("pertenence-to-group", 'earth', 'planet'),
    },
    {
        "text": "Green is a color",
        "parsed": ("pertenence-to-group", 'green', 'color'),
    },
    {
        "text": "airplanes do fly",
        "parsed": ("has-capacity", 'airplane', 'fly')
    }
 ]
 knowledge_base = collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict()))
 def property_for_value(value):
    if "cold":
        return "temperature"
 def exists_property_with_value(subj, value):
    knowledge_base[subj][property_for_value(value)] = value
 def pertenence_to_group(subj, group):
    knowledge_base[subj]["group"] = group
 def has_capacity(subj, capacity):
    if "capacities" not in knowledge_base[subj]:
        knowledge_base[subj]["capacities"] = []
    knowledge_base[subj]["capacities"].append(capacity)
 knowledge_ingestion = {
    "exists-property-with-value": exists_property_with_value,
    "pertenence-to-group": pertenence_to_group,
    "has-capacity": has_capacity,
 }
 def ingest(example):
    method = example['parsed'][0]
    args = example['parsed'][1:]
    knowledge_ingestion[method](*args)
 for example in examples:
    ingest(example)
 print(json.dumps(knowledge_base, indent=4, sort_keys=True))
--- a/naive-nlu/nlu.py
+++ b/naive-nlu/nlu.py
@ -0,0 +1,139 @@
 import collections
 from functools import reduce
 # # # # # # # # # # Base representation
 KnowledgeBase = collections.namedtuple('KnowledgeBase',
                                       [
                                           'examples',   # Language examples
                                           'knowledge',  # Knowledge about the world
                                           'trained',
                                       ])
 # # # # # # # # # # Interpretation
 def property_for_value(knowledge_base, value):
    return knowledge_base[value]['as_property']
 def exists_property_with_value(knowledge_base, subj, value):
    knowledge_base[subj][property_for_value(knowledge_base, value)] = value
 def pertenence_to_group(knowledge_base, subj, group):
    knowledge_base[subj]["group"] = group
 def has_capacity(knowledge_base, subj, capacity):
    if "capacities" not in knowledge_base[subj]:
        knowledge_base[subj]["capacities"] = []
    knowledge_base[subj]["capacities"].append(capacity)
 knowledge_ingestion = {
    "exists-property-with-value": exists_property_with_value,
    "pertenence-to-group": pertenence_to_group,
    "has-capacity": has_capacity,
 }
 def integrate_information(knowledge_base, example):
    method = example['parsed'][0]
    args = example['parsed'][1:]
    knowledge_ingestion[method](knowledge_base, *args)
 # # # # # # # # # # Parsing
 def make_template(knowledge_base: KnowledgeBase, text, parsed):
    tokens = text.split()
    template = list(parsed)
    for i in range(len(tokens)):
        word = tokens[i]
        if word in template:
            print(word, i, template)
            template[template.index(word)] = i
            print(knowledge_base)
            tokens[i] = {'groups': set(knowledge_base.knowledge[word]['groups'])}
    return tokens, template
 def integrate_language(knowledge_base: KnowledgeBase, example):
    text = example["text"].lower()
    parsed = example["parsed"]
    matcher, result = make_template(knowledge_base, text, parsed)
    print(text)
    print(parsed)
    print()
    return matcher, result
 def train(knowledge_base: KnowledgeBase, examples):
    # Parse everything
    parsed_examples = []
    for example in examples:
        parsed_examples.append(integrate_language(knowledge_base, example))
    # Reduce values
    trained = reprocess_knowledge(knowledge_base, parsed_examples)
    return KnowledgeBase(
        knowledge=knowledge_base.knowledge,
        examples=knowledge_base.examples + parsed_examples,
        trained=trained,
    )
 def reprocess_knowledge(knowledge_base, examples):
    examples = knowledge_base.examples + examples
    print('\n'.join(map(str, knowledge_base.examples)))
    print("--")
    pattern_examples = []
    for i, sample in enumerate(examples):
        other = examples[:i] + examples[i + 1:]
        print(sample)
        match = get_matching(sample, other)
        print("->", match)
        if len(match) > 0:
            sample = (match, sample[1],)
        pattern_examples.append(sample)
        print()
    return pattern_examples
 def get_matching(sample, other):
    l = len(sample[0])
    other = list(filter(lambda x: len(x[0]) == l, other))
    for i in range(l):
        if len(other) == 0:
            return []
        if not isinstance(sample[0][i], str):
            other = list(filter(lambda x: not isinstance(x[0][i], str) and
                                len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,
                                other))
    return [sample[0][x] if isinstance(sample[0][x], str)
            else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b,
                                                            map(lambda y: y[0][x]['groups'],
                                                                other))}
            for x
            in range(l)]
 def get_fit(knowledge: KnowledgeBase, row):
    for sample, ast in knowledge.trained:
        if len(sample) != len(row):
            continue
        if all(map(lambda x: not isinstance(sample[x], str) or sample[x] == row[x],
                   range(len(sample)))):
            return sample, ast
    else:
        return None
--- a/naive-nlu/test.py
+++ b/naive-nlu/test.py
@ -0,0 +1,88 @@
 import nlu
 examples = [
    {
        "text": "icecream is cold",
        "parsed": ("exists-property-with-value", 'icecream', 'cold'),
    },
    {
        "text": "lava is dangerous",
        "parsed": ("exists-property-with-value", 'lava', 'dangerous')
    },
    {
        "text": "earth is a planet",
        "parsed": ("pertenence-to-group", 'earth', 'planet'),
    },
    {
        "text": "Green is a color",
        "parsed": ("pertenence-to-group", 'green', 'color'),
    },
    {
        "text": "a plane can fly",
        "parsed": ("has-capacity", 'plane', 'fly')
    },
    {
        "text": "a wale can swim",
        "parsed": ("has-capacity", 'wale', 'swim')
    }
 ]
 base_knowledge = {
    'icecream': {
        "groups": ['noun', 'object', 'comestible', 'sweet'],
    },
    'lava': {
        "groups": ['noun', 'object'],
    },
    'earth': {
        "groups": ['noun', 'object', 'planet'],
    },
    'green': {
        "groups": ['noun', 'color', 'concept'],
    },
    'plane': {
        "groups": ['noun', 'object', 'vehicle', 'fast'],
    },
    'car': {
        "groups": ['noun', 'object', 'vehicle', 'slow-ish'],
    },
    'wale': {
        "groups": ['noun', 'object', 'living-being']
    },
    'cold': {
        "groups": ['property', 'temperature'],
    },
    'dangerous': {
        "groups": ['property'],
    },
    'planet': {
        "groups": ['noun', 'group'],
    },
    'color': {
        "groups": ['property', 'group'],
    },
    'fly': {
        "groups": ['verb'],
    },
    'swim': {
        "groups": ['verb'],
    },
 }
 def main():
    knowledge = nlu.KnowledgeBase(
        examples=[],
        trained=[],
        knowledge=base_knowledge
    )
    knowledge = nlu.train(knowledge, examples)
    for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
        row = test['text'].lower().split()
        fit = nlu.get_fit(knowledge, row)
        print(test['text'], fit)
 if __name__ == '__main__':
    main()
--- a/parsing_test.py
+++ b/parsing_test.py
@ -1,158 +0,0 @@
 #!/usr/bin/env python
 import json
 from functools import reduce
 examples = [
    {
        "text": "icecream is cold",
        "parsed": ("exists-property-with-value", 'icecream', 'cold'),
    },
    {
        "text": "lava is dangerous",
        "parsed": ("exists-property-with-value", 'lava', 'dangerous')
    },
    {
        "text": "earth is a planet",
        "parsed": ("pertenence-to-group", 'earth', 'planet'),
    },
    {
        "text": "Green is a color",
        "parsed": ("pertenence-to-group", 'green', 'color'),
    },
    {
        "text": "a plane can fly",
        "parsed": ("has-capacity", 'plane', 'fly')
    },
    {
        "text": "a wale can swim",
        "parsed": ("has-capacity", 'wale', 'swim')
    }
 ]
 knowledge_base = {
    'icecream': {
        "groups": ['noun', 'object', 'comestible', 'sweet'],
    },
    'lava': {
        "groups": ['noun', 'object'],
    },
    'earth': {
        "groups": ['noun', 'object', 'planet'],
    },
    'green': {
        "groups": ['noun', 'color', 'concept'],
    },
    'plane': {
        "groups": ['noun', 'object', 'vehicle', 'fast'],
    },
    'car': {
        "groups": ['noun', 'object', 'vehicle', 'slow-ish'],
    },
    'wale': {
        "groups": ['noun', 'object', 'living-being']
    },
    'cold': {
        "groups": ['property', 'temperature'],
    },
    'dangerous': {
        "groups": ['property'],
    },
    'planet': {
        "groups": ['noun', 'group'],
    },
    'color': {
        "groups": ['property', 'group'],
    },
    'fly': {
        "groups": ['verb'],
    },
    'swim': {
        "groups": ['verb'],
    },
 }
 def make_template(text, parsed):
    tokens = text.split()
    template = list(parsed)
    for i in range(len(tokens)):
        word = tokens[i]
        if word in template:
            print(word, i, template)
            template[template.index(word)] = i
            tokens[i] = {'groups': set(knowledge_base[word]['groups'])}
    return tokens, template
 def ingest(example):
    text = example["text"].lower()
    parsed = example["parsed"]
    matcher, result = make_template(text, parsed)
    print(text)
    print(parsed)
    print()
    return matcher, result
 language_training = []
 for example in examples:
    language_training.append(ingest(example))
 def get_matching(sample, other):
    l = len(sample[0])
    other = list(filter(lambda x: len(x[0]) == l, other))
    for i in range(l):
        if len(other) == 0:
            return []
        if not isinstance(sample[0][i], str):
            other = list(filter(lambda x: not isinstance(x[0][i], str) and
                                len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,
                                other))
    return [sample[0][x] if isinstance(sample[0][x], str)
            else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b,
                                                            map(lambda y: y[0][x]['groups'],
                                                                other))}
            for x
            in range(l)]
 print('\n'.join(map(str, language_training)))
 print("--")
 pattern_examples = []
 for i, sample in enumerate(language_training):
    other = language_training[:i] + language_training[i + 1:]
    print(sample)
    match = get_matching(sample, other)
    print("->", match)
    if len(match) > 0:
        sample = (match, sample[1],)
    pattern_examples.append(sample)
    print()
 def get_fit(patterns, row):
    for sample, ast in patterns:
        if len(sample) != len(row):
            continue
        if all(map(lambda x: not isinstance(sample[x], str) or sample[x] == row[x],
                   range(len(sample)))):
               return sample, ast
    else:
        return None
 for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
    row = test['text'].lower().split()
    fit = get_fit(pattern_examples, row)
    print(test['text'], fit)
 # while True:
 #     row = input('>>> ').lower().split()
 #     fit = get_fit(pattern_examples, row)
 #     if fit is None:
 #         print("No fit")
 #     else:
 #         print(fit)