From 5b30713df177eb6758021e7da76c7097fa0a6c0a Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Thu, 11 May 2017 01:05:07 +0200 Subject: [PATCH] Use a single object for both parts of the training. --- absorption-test.py | 63 ------------------ naive-nlu/nlu.py | 139 +++++++++++++++++++++++++++++++++++++++ naive-nlu/test.py | 88 +++++++++++++++++++++++++ parsing_test.py | 158 --------------------------------------------- 4 files changed, 227 insertions(+), 221 deletions(-) delete mode 100644 absorption-test.py create mode 100644 naive-nlu/nlu.py create mode 100644 naive-nlu/test.py delete mode 100644 parsing_test.py diff --git a/absorption-test.py b/absorption-test.py deleted file mode 100644 index 023d742..0000000 --- a/absorption-test.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python - -import json -import collections - -examples = [ - { - "text": "icecream is cold", - "parsed": ("exists-property-with-value", 'icecream', 'cold'), - }, - { - "text": "earth is a planet", - "parsed": ("pertenence-to-group", 'earth', 'planet'), - }, - { - "text": "Green is a color", - "parsed": ("pertenence-to-group", 'green', 'color'), - }, - { - "text": "airplanes do fly", - "parsed": ("has-capacity", 'airplane', 'fly') - } -] - -knowledge_base = collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict())) - - -def property_for_value(value): - if "cold": - return "temperature" - - -def exists_property_with_value(subj, value): - knowledge_base[subj][property_for_value(value)] = value - - -def pertenence_to_group(subj, group): - knowledge_base[subj]["group"] = group - - -def has_capacity(subj, capacity): - if "capacities" not in knowledge_base[subj]: - knowledge_base[subj]["capacities"] = [] - knowledge_base[subj]["capacities"].append(capacity) - - -knowledge_ingestion = { - "exists-property-with-value": exists_property_with_value, - "pertenence-to-group": pertenence_to_group, - "has-capacity": has_capacity, -} - - -def ingest(example): - method = example['parsed'][0] - args = example['parsed'][1:] - knowledge_ingestion[method](*args) - - -for example in examples: - ingest(example) - -print(json.dumps(knowledge_base, indent=4, sort_keys=True)) diff --git a/naive-nlu/nlu.py b/naive-nlu/nlu.py new file mode 100644 index 0000000..4e2f208 --- /dev/null +++ b/naive-nlu/nlu.py @@ -0,0 +1,139 @@ +import collections +from functools import reduce + + +# # # # # # # # # # Base representation + +KnowledgeBase = collections.namedtuple('KnowledgeBase', + [ + 'examples', # Language examples + 'knowledge', # Knowledge about the world + 'trained', + ]) + +# # # # # # # # # # Interpretation + + +def property_for_value(knowledge_base, value): + return knowledge_base[value]['as_property'] + + +def exists_property_with_value(knowledge_base, subj, value): + + knowledge_base[subj][property_for_value(knowledge_base, value)] = value + + +def pertenence_to_group(knowledge_base, subj, group): + knowledge_base[subj]["group"] = group + + +def has_capacity(knowledge_base, subj, capacity): + if "capacities" not in knowledge_base[subj]: + knowledge_base[subj]["capacities"] = [] + knowledge_base[subj]["capacities"].append(capacity) + + +knowledge_ingestion = { + "exists-property-with-value": exists_property_with_value, + "pertenence-to-group": pertenence_to_group, + "has-capacity": has_capacity, +} + + +def integrate_information(knowledge_base, example): + method = example['parsed'][0] + args = example['parsed'][1:] + knowledge_ingestion[method](knowledge_base, *args) + +# # # # # # # # # # Parsing + + +def make_template(knowledge_base: KnowledgeBase, text, parsed): + tokens = text.split() + template = list(parsed) + for i in range(len(tokens)): + word = tokens[i] + if word in template: + print(word, i, template) + template[template.index(word)] = i + print(knowledge_base) + tokens[i] = {'groups': set(knowledge_base.knowledge[word]['groups'])} + return tokens, template + + +def integrate_language(knowledge_base: KnowledgeBase, example): + text = example["text"].lower() + parsed = example["parsed"] + matcher, result = make_template(knowledge_base, text, parsed) + print(text) + print(parsed) + print() + return matcher, result + + +def train(knowledge_base: KnowledgeBase, examples): + + # Parse everything + parsed_examples = [] + for example in examples: + parsed_examples.append(integrate_language(knowledge_base, example)) + + # Reduce values + trained = reprocess_knowledge(knowledge_base, parsed_examples) + + return KnowledgeBase( + knowledge=knowledge_base.knowledge, + examples=knowledge_base.examples + parsed_examples, + trained=trained, + ) + + +def reprocess_knowledge(knowledge_base, examples): + examples = knowledge_base.examples + examples + + print('\n'.join(map(str, knowledge_base.examples))) + print("--") + + pattern_examples = [] + for i, sample in enumerate(examples): + other = examples[:i] + examples[i + 1:] + print(sample) + match = get_matching(sample, other) + print("->", match) + if len(match) > 0: + sample = (match, sample[1],) + pattern_examples.append(sample) + print() + return pattern_examples + + +def get_matching(sample, other): + l = len(sample[0]) + other = list(filter(lambda x: len(x[0]) == l, other)) + for i in range(l): + if len(other) == 0: + return [] + + if not isinstance(sample[0][i], str): + other = list(filter(lambda x: not isinstance(x[0][i], str) and + len(x[0][i]['groups'] & sample[0][i]['groups']) > 0, + other)) + + return [sample[0][x] if isinstance(sample[0][x], str) + else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b, + map(lambda y: y[0][x]['groups'], + other))} + for x + in range(l)] + + +def get_fit(knowledge: KnowledgeBase, row): + for sample, ast in knowledge.trained: + if len(sample) != len(row): + continue + + if all(map(lambda x: not isinstance(sample[x], str) or sample[x] == row[x], + range(len(sample)))): + return sample, ast + else: + return None diff --git a/naive-nlu/test.py b/naive-nlu/test.py new file mode 100644 index 0000000..e44cba3 --- /dev/null +++ b/naive-nlu/test.py @@ -0,0 +1,88 @@ +import nlu + +examples = [ + { + "text": "icecream is cold", + "parsed": ("exists-property-with-value", 'icecream', 'cold'), + }, + { + "text": "lava is dangerous", + "parsed": ("exists-property-with-value", 'lava', 'dangerous') + }, + { + "text": "earth is a planet", + "parsed": ("pertenence-to-group", 'earth', 'planet'), + }, + { + "text": "Green is a color", + "parsed": ("pertenence-to-group", 'green', 'color'), + }, + { + "text": "a plane can fly", + "parsed": ("has-capacity", 'plane', 'fly') + }, + { + "text": "a wale can swim", + "parsed": ("has-capacity", 'wale', 'swim') + } +] + +base_knowledge = { + 'icecream': { + "groups": ['noun', 'object', 'comestible', 'sweet'], + }, + 'lava': { + "groups": ['noun', 'object'], + }, + 'earth': { + "groups": ['noun', 'object', 'planet'], + }, + 'green': { + "groups": ['noun', 'color', 'concept'], + }, + 'plane': { + "groups": ['noun', 'object', 'vehicle', 'fast'], + }, + 'car': { + "groups": ['noun', 'object', 'vehicle', 'slow-ish'], + }, + 'wale': { + "groups": ['noun', 'object', 'living-being'] + }, + 'cold': { + "groups": ['property', 'temperature'], + }, + 'dangerous': { + "groups": ['property'], + }, + 'planet': { + "groups": ['noun', 'group'], + }, + 'color': { + "groups": ['property', 'group'], + }, + 'fly': { + "groups": ['verb'], + }, + 'swim': { + "groups": ['verb'], + }, +} + + +def main(): + knowledge = nlu.KnowledgeBase( + examples=[], + trained=[], + knowledge=base_knowledge + ) + + knowledge = nlu.train(knowledge, examples) + for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: + row = test['text'].lower().split() + fit = nlu.get_fit(knowledge, row) + print(test['text'], fit) + + +if __name__ == '__main__': + main() diff --git a/parsing_test.py b/parsing_test.py deleted file mode 100644 index 74f011b..0000000 --- a/parsing_test.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python - -import json -from functools import reduce - -examples = [ - { - "text": "icecream is cold", - "parsed": ("exists-property-with-value", 'icecream', 'cold'), - }, - { - "text": "lava is dangerous", - "parsed": ("exists-property-with-value", 'lava', 'dangerous') - }, - { - "text": "earth is a planet", - "parsed": ("pertenence-to-group", 'earth', 'planet'), - }, - { - "text": "Green is a color", - "parsed": ("pertenence-to-group", 'green', 'color'), - }, - { - "text": "a plane can fly", - "parsed": ("has-capacity", 'plane', 'fly') - }, - { - "text": "a wale can swim", - "parsed": ("has-capacity", 'wale', 'swim') - } -] - -knowledge_base = { - 'icecream': { - "groups": ['noun', 'object', 'comestible', 'sweet'], - }, - 'lava': { - "groups": ['noun', 'object'], - }, - 'earth': { - "groups": ['noun', 'object', 'planet'], - }, - 'green': { - "groups": ['noun', 'color', 'concept'], - }, - 'plane': { - "groups": ['noun', 'object', 'vehicle', 'fast'], - }, - 'car': { - "groups": ['noun', 'object', 'vehicle', 'slow-ish'], - }, - 'wale': { - "groups": ['noun', 'object', 'living-being'] - }, - 'cold': { - "groups": ['property', 'temperature'], - }, - 'dangerous': { - "groups": ['property'], - }, - 'planet': { - "groups": ['noun', 'group'], - }, - 'color': { - "groups": ['property', 'group'], - }, - 'fly': { - "groups": ['verb'], - }, - 'swim': { - "groups": ['verb'], - }, -} - - -def make_template(text, parsed): - tokens = text.split() - template = list(parsed) - for i in range(len(tokens)): - word = tokens[i] - if word in template: - print(word, i, template) - template[template.index(word)] = i - tokens[i] = {'groups': set(knowledge_base[word]['groups'])} - return tokens, template - - -def ingest(example): - text = example["text"].lower() - parsed = example["parsed"] - matcher, result = make_template(text, parsed) - print(text) - print(parsed) - print() - return matcher, result - - -language_training = [] -for example in examples: - language_training.append(ingest(example)) - - -def get_matching(sample, other): - l = len(sample[0]) - other = list(filter(lambda x: len(x[0]) == l, other)) - for i in range(l): - if len(other) == 0: - return [] - - if not isinstance(sample[0][i], str): - other = list(filter(lambda x: not isinstance(x[0][i], str) and - len(x[0][i]['groups'] & sample[0][i]['groups']) > 0, - other)) - - return [sample[0][x] if isinstance(sample[0][x], str) - else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b, - map(lambda y: y[0][x]['groups'], - other))} - for x - in range(l)] - - -print('\n'.join(map(str, language_training))) -print("--") -pattern_examples = [] -for i, sample in enumerate(language_training): - other = language_training[:i] + language_training[i + 1:] - print(sample) - match = get_matching(sample, other) - print("->", match) - if len(match) > 0: - sample = (match, sample[1],) - pattern_examples.append(sample) - print() - -def get_fit(patterns, row): - for sample, ast in patterns: - if len(sample) != len(row): - continue - - if all(map(lambda x: not isinstance(sample[x], str) or sample[x] == row[x], - range(len(sample)))): - return sample, ast - else: - return None - -for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: - row = test['text'].lower().split() - fit = get_fit(pattern_examples, row) - print(test['text'], fit) - -# while True: -# row = input('>>> ').lower().split() -# fit = get_fit(pattern_examples, row) -# if fit is None: -# print("No fit") -# else: -# print(fit)