From a94fd31af18fdab0069e138a74c3b6367308a879 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Thu, 11 May 2017 21:13:27 +0200 Subject: [PATCH 1/6] Add base test. --- naive-nlu/knowledge_base.py | 2 +- naive-nlu/test.py | 21 +++++++++++++++------ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/naive-nlu/knowledge_base.py b/naive-nlu/knowledge_base.py index 081de2c..3368df8 100644 --- a/naive-nlu/knowledge_base.py +++ b/naive-nlu/knowledge_base.py @@ -60,7 +60,7 @@ class KnowledgeBase(object): knowledge_diff_getter = lambda: diff_knowledge(knowledge_before, knowledge_after) - return result, knowledge_diff_getter + return result, inferred_tree, knowledge_diff_getter def act_upon(self, result): if isinstance(result, ModifiableProperty): diff --git a/naive-nlu/test.py b/naive-nlu/test.py index 5030715..b42b55d 100644 --- a/naive-nlu/test.py +++ b/naive-nlu/test.py @@ -7,14 +7,26 @@ examples = [ "text": "icecream is cold", "parsed": ("exists-property-with-value", 'icecream', 'cold'), }, + { + "text": "is icecream cold?", + "parsed": ("question", ("exists-property-with-value", 'icecream', 'cold')) + }, { "text": "lava is dangerous", "parsed": ("exists-property-with-value", 'lava', 'dangerous') }, + { + "text": "is lava dangerous?", + "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')), + }, { "text": "earth is a planet", "parsed": ("pertenence-to-group", 'earth', 'planet'), }, + { + "text": "is earth a moon?", + "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')), + }, { "text": "Green is a color", "parsed": ("pertenence-to-group", 'green', 'color'), @@ -26,7 +38,7 @@ examples = [ { "text": "a wale can swim", "parsed": ("has-capacity", 'wale', 'swim') - } + }, ] base_knowledge = { @@ -78,10 +90,7 @@ def test_assumption(expectedResponse, knowledge, query): print("Query: {}".format(query['text'])) print("Expected: {}".format(expectedResponse)) - import knowledge_evaluation - result = knowledge_evaluation.integrate_information( - knowledge.knowledge, - query) + result, abstract_tree, diff = knowledge.process(query['text']) print("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if result != expectedResponse else "2", result)) @@ -97,7 +106,7 @@ def main(): for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: row = test['text'] - result, differences = knowledge.process(row) + result, inferred_tree, differences = knowledge.process(row) print("result:", result) print(differences()) From 0e192407204f40a230c91a42b403da0e25ad335f Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Sat, 13 May 2017 20:28:11 +0200 Subject: [PATCH 2/6] =?UTF-8?q?Building=20base=20(extremely=20na=C3=AFve)?= =?UTF-8?q?=20subgraph=20remix=20matrix.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- naive-nlu/knowledge_base.py | 9 +- naive-nlu/parsing.py | 173 ++++++++++++++++++++++++++++++++++-- 2 files changed, 171 insertions(+), 11 deletions(-) diff --git a/naive-nlu/knowledge_base.py b/naive-nlu/knowledge_base.py index 3368df8..310d6f7 100644 --- a/naive-nlu/knowledge_base.py +++ b/naive-nlu/knowledge_base.py @@ -30,13 +30,10 @@ class KnowledgeBase(object): "parsed": inferred_tree, }) self.act_upon(result) - parsed_examples.append((decomposition, inferred_tree)) + self.examples.append((decomposition, inferred_tree)) - # Reduce values - trained = parsing.reprocess_language_knowledge(self, parsed_examples) - - self.examples += parsed_examples - self.trained = trained + # Reduce values + self.trained = parsing.reprocess_language_knowledge(self, self.examples) knowledge_after = copy.deepcopy(self.knowledge) knowledge_diff_getter = lambda: diff_knowledge(knowledge_before, diff --git a/naive-nlu/parsing.py b/naive-nlu/parsing.py index f0df874..de57d64 100644 --- a/naive-nlu/parsing.py +++ b/naive-nlu/parsing.py @@ -1,10 +1,12 @@ #!/usr/bin/env python +import knowledge_evaluation + +import re from functools import reduce - def make_template(knowledge_base, text, parsed): - tokens = text.split() + tokens = re.findall(r'(\w+|[^\s])', text) matcher = list(tokens) template = list(parsed) for i in range(len(matcher)): @@ -17,16 +19,177 @@ def make_template(knowledge_base, text, parsed): return tokens, matcher, template +def is_bottom_level(tree): + for element in tree: + if isinstance(element, list) or isinstance(element, tuple): + return False + return True + + +def get_lower_levels(parsed): + lower = [] + def aux(subtree, top_level): + nonlocal lower + deeper = top_level + for element in subtree: + if isinstance(element, list) or isinstance(element, tuple): + aux(element, top_level=False) + deeper = True + + if not deeper: + lower.append(subtree) + + aux(parsed, top_level=True) + return lower + + def integrate_language(knowledge_base, example): text = example["text"].lower() parsed = example["parsed"] + + print("P:", parsed) + while True: + lower_levels = get_lower_levels(parsed) + print("Lower:", lower_levels) + if len(lower_levels) == 0: + break + + for atom in lower_levels: + print("\x1b[1mSelecting\x1b[0m:", atom) + similar = get_similar_tree(knowledge_base, atom) + print("___>", similar) + remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, text, atom, similar) + tokens, matcher, result = make_template(knowledge_base, text, atom) + print("Tx:", tokens) + print("Mx:", matcher) + print("Rx:", result) + print("#########") + + break + tokens, matcher, result = make_template(knowledge_base, text, parsed) - print(text) - print(parsed) + print("T:", tokens) + print("M:", matcher) + print("R:", result) print() return tokens, matcher, result +def build_remix_matrix(knowledge_base, text, atom, similar): + # print("+" * 20) + + tokens, matcher, result = make_template(knowledge_base, text, atom) + similar_matcher, similar_result, similar_result_resolved, _ = similar + + # print("NEW:") + # print("Tokens:", tokens) + # print("Matcher:", matcher) + # print("Result:", result) + # print() + # print("Similar:") + # print("Matcher:", similar_matcher) + # print("Result:", similar_result) + + start_bounds, end_bounds = find_bounds(matcher, similar_matcher) + # print() + # print("Bounds:") + # print("Start:", start_bounds) + # print("End: ", end_bounds) + + for i, element in (end_bounds + start_bounds[::-1]): + matcher.pop(i) + tokens.pop(i) + + possible_remixes = get_possible_remixes(matcher, similar_matcher) + chosen_remix = possible_remixes[0] + + # print("New tokens:", tokens) + # print("-" * 20) + return chosen_remix, (start_bounds, end_bounds) + + +def get_possible_remixes(matcher, similar_matcher): + # print("*" * 20) + # print(matcher) + # print(similar_matcher) + + matrix = [] + for element in matcher: + assert(element in similar_matcher) + indexes = all_indexes(similar_matcher, element) + matrix.append(indexes) + + # print(matrix) + # print([list(x) for x in list(zip(*matrix))]) + # TODO: do some scoring to find the most "interesting combination" + return [list(x) for x in list(zip(*matrix))] + + +def all_indexes(collection, element): + indexes = [] + base = 0 + + for _ in range(collection.count(element)): + i = collection.index(element, base) + base = i + 1 + indexes.append(i) + + return indexes + + +def find_bounds(matcher, similar_matcher): + start_bounds = [] + for i, element in enumerate(matcher): + if element in similar_matcher: + break + else: + start_bounds.append((i, element)) + + end_bounds = [] + for i, element in enumerate(matcher[::-1]): + if element in similar_matcher: + break + else: + end_bounds.append((len(matcher) - (i + 1), element)) + + return start_bounds, end_bounds + + +def get_similar_tree(knowledge_base, atom): + possibilities = [] + + # Find matching possibilities + for entry, tree in knowledge_base.trained: + if not is_bottom_level(tree): + continue + if tree[0] == atom[0]: + possibilities.append((entry, tree)) + + # Sort by more matching elements + sorted_possibilities = [] + for (raw, possibility) in possibilities: + resolved = [] + for element in atom: + if isinstance(element, str): + resolved.append(element) + else: + resolved.append(knowledge_evaluation.resolve( + knowledge_base.knowledge, + element, + raw)) + + # TODO: Probably should take into account the categories of the elements in the "intake" ([0]) element + score = sum([resolved[i] == atom[i] + for i + in range(min(len(resolved), + len(atom)))]) + sorted_possibilities.append((raw, possibility, resolved, score)) + sorted_possibilities = sorted(sorted_possibilities, key=lambda p: p[3], reverse=True) + if len(sorted_possibilities) < 1: + return None + + return sorted_possibilities[0] + def get_matching(sample, other): l = len(sample[0]) other = list(filter(lambda x: len(x[0]) == l, other)) @@ -56,13 +219,13 @@ def reprocess_language_knowledge(knowledge_base, examples): pattern_examples = [] for i, sample in enumerate(examples): other = examples[:i] + examples[i + 1:] - print(sample) match = get_matching(sample, other) print("->", match) if len(match) > 0: sample = (match, sample[1],) pattern_examples.append(sample) print() + print("\x1b[7m--\x1b[0m") return pattern_examples From c6eaf056aa8717e2cc52ce4e90bd41195e6605c9 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Sat, 13 May 2017 20:28:27 +0200 Subject: [PATCH 3/6] Comment tests outside current development scope. --- naive-nlu/test.py | 88 +++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/naive-nlu/test.py b/naive-nlu/test.py index b42b55d..fe5b65b 100644 --- a/naive-nlu/test.py +++ b/naive-nlu/test.py @@ -11,34 +11,34 @@ examples = [ "text": "is icecream cold?", "parsed": ("question", ("exists-property-with-value", 'icecream', 'cold')) }, - { - "text": "lava is dangerous", - "parsed": ("exists-property-with-value", 'lava', 'dangerous') - }, - { - "text": "is lava dangerous?", - "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')), - }, - { - "text": "earth is a planet", - "parsed": ("pertenence-to-group", 'earth', 'planet'), - }, - { - "text": "is earth a moon?", - "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')), - }, - { - "text": "Green is a color", - "parsed": ("pertenence-to-group", 'green', 'color'), - }, - { - "text": "a plane can fly", - "parsed": ("has-capacity", 'plane', 'fly') - }, - { - "text": "a wale can swim", - "parsed": ("has-capacity", 'wale', 'swim') - }, + # { + # "text": "lava is dangerous", + # "parsed": ("exists-property-with-value", 'lava', 'dangerous') + # }, + # { + # "text": "is lava dangerous?", + # "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')), + # }, + # { + # "text": "earth is a planet", + # "parsed": ("pertenence-to-group", 'earth', 'planet'), + # }, + # { + # "text": "is earth a moon?", + # "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')), + # }, + # { + # "text": "Green is a color", + # "parsed": ("pertenence-to-group", 'green', 'color'), + # }, + # { + # "text": "a plane can fly", + # "parsed": ("has-capacity", 'plane', 'fly') + # }, + # { + # "text": "a wale can swim", + # "parsed": ("has-capacity", 'wale', 'swim') + # }, ] base_knowledge = { @@ -100,25 +100,25 @@ def main(): ) differences = knowledge.train(examples) - print("----") - print(differences()) - print("----") + # print("----") + # print(differences()) + # print("----") - for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: - row = test['text'] - result, inferred_tree, differences = knowledge.process(row) + # for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: + # row = test['text'] + # result, inferred_tree, differences = knowledge.process(row) - print("result:", result) - print(differences()) - print() - print('-----') - print(json.dumps(sorted(knowledge.knowledge.keys()), indent=4)) - print('-----') - queryTrue = { "text": "is io a moon?", "parsed": ("question", ("pertenence-to-group", "io", "moon")) } - queryFalse = { "text": "is io a planet?", "parsed": ("question", ("pertenence-to-group", "io", "planet")) } + # print("result:", result) + # print(differences()) + # print() + # print('-----') + # print(json.dumps(sorted(knowledge.knowledge.keys()), indent=4)) + # print('-----') + # queryTrue = { "text": "is io a moon?", "parsed": ("question", ("pertenence-to-group", "io", "moon")) } + # queryFalse = { "text": "is io a planet?", "parsed": ("question", ("pertenence-to-group", "io", "planet")) } - test_assumption(True, knowledge, queryTrue) - test_assumption(False, knowledge, queryFalse) + # test_assumption(True, knowledge, queryTrue) + # test_assumption(False, knowledge, queryFalse) if __name__ == '__main__': main() From 099af2a8156dbd6404634e15e07d85d0d8a98365 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Sat, 13 May 2017 20:33:09 +0200 Subject: [PATCH 4/6] Add remix application sample. --- naive-nlu/parsing.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/naive-nlu/parsing.py b/naive-nlu/parsing.py index de57d64..4e4b179 100644 --- a/naive-nlu/parsing.py +++ b/naive-nlu/parsing.py @@ -63,6 +63,10 @@ def integrate_language(knowledge_base, example): print("Tx:", tokens) print("Mx:", matcher) print("Rx:", result) + print("Remix:", remix) + after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix) + assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens)) + print(" \\->", after_remix) print("#########") break @@ -75,6 +79,13 @@ def integrate_language(knowledge_base, example): return tokens, matcher, result +def apply_remix(tokens, remix): + rebuilt = [] + for i in remix: + rebuilt.append(tokens[i]) + return rebuilt + + def build_remix_matrix(knowledge_base, text, atom, similar): # print("+" * 20) From 5f6b067e17ed73806e15229fcae470dea8fc7094 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Mon, 15 May 2017 16:51:39 +0200 Subject: [PATCH 5/6] Add learning phase to shallow (1 level) nested structures. --- naive-nlu/knowledge_base.py | 10 ++++ naive-nlu/knowledge_evaluation.py | 34 +++++++++++++- naive-nlu/parsing.py | 78 ++++++++++++++++++++++++------- naive-nlu/test.py | 16 ++++--- 4 files changed, 111 insertions(+), 27 deletions(-) diff --git a/naive-nlu/knowledge_base.py b/naive-nlu/knowledge_base.py index 310d6f7..16845cc 100644 --- a/naive-nlu/knowledge_base.py +++ b/naive-nlu/knowledge_base.py @@ -22,6 +22,7 @@ class KnowledgeBase(object): # Parse everything parsed_examples = [] for example in examples: + print("\x1b[7;32m> {} \x1b[0m".format(example)) tokens, decomposition, inferred_tree = parsing.integrate_language(self, example) print(tokens) result = knowledge_evaluation.integrate_information(self.knowledge, { @@ -29,7 +30,10 @@ class KnowledgeBase(object): "decomposition": decomposition, "parsed": inferred_tree, }) + + print("\x1b[7;33m< {} \x1b[0m".format(self.get_value(result))) self.act_upon(result) + print("\x1b[7;34m< {} \x1b[0m".format(self.get_value(result))) self.examples.append((decomposition, inferred_tree)) # Reduce values @@ -59,6 +63,12 @@ class KnowledgeBase(object): return result, inferred_tree, knowledge_diff_getter + def get_value(self, result): + if isinstance(result, ModifiableProperty): + return result.getter() + else: + return result + def act_upon(self, result): if isinstance(result, ModifiableProperty): result.setter() diff --git a/naive-nlu/knowledge_evaluation.py b/naive-nlu/knowledge_evaluation.py index c705964..fb717f3 100644 --- a/naive-nlu/knowledge_evaluation.py +++ b/naive-nlu/knowledge_evaluation.py @@ -12,9 +12,33 @@ def resolve(knowledge_base, elements, value): return value +# TODO: improve typing +def infer_type(result): + if isinstance(result, bool): + return "bool" + elif isinstance(result, int): + return "int" + + else: + raise Exception("Unknown type for value: {}".format(result)) + + +def get_subquery_type(knowledge_base, atom): + subquery_result = integrate_information(knowledge_base, + { + "parsed": atom, + "elements": [], + }) + assert (subquery_result is not None) + result = subquery_result.getter() + + result_type = infer_type(result) + return result_type + + def property_for_value(knowledge_base, value): - print(value) - print(knowledge_base[value]) + # print(value) + # print(knowledge_base[value]) return knowledge_base[value]['as_property'] @@ -27,6 +51,11 @@ def modifiable_property_from_property(prop, path, value): nonlocal prop, path, value prop[path] = value + return ModifiableProperty( + getter=getter, + setter=setter, + ) + def exists_property_with_value(knowledge_base, elements, subj, value): subj = resolve(knowledge_base, elements, subj) @@ -50,6 +79,7 @@ def modifiable_element_for_existance_in_set(container, set_name, element): def setter(): nonlocal container, set_name, element return container[set_name].add(element) + return ModifiableProperty( getter=getter, setter=setter, diff --git a/naive-nlu/parsing.py b/naive-nlu/parsing.py index 4e4b179..75ea3f5 100644 --- a/naive-nlu/parsing.py +++ b/naive-nlu/parsing.py @@ -3,10 +3,16 @@ import knowledge_evaluation import re +import copy from functools import reduce -def make_template(knowledge_base, text, parsed): - tokens = re.findall(r'(\w+|[^\s])', text) + +# TODO: more flexible tokenization +def to_tokens(text): + return re.findall(r'(\w+|[^\s])', text) + + +def make_template(knowledge_base, tokens, parsed): matcher = list(tokens) template = list(parsed) for i in range(len(matcher)): @@ -28,50 +34,85 @@ def is_bottom_level(tree): def get_lower_levels(parsed): lower = [] - def aux(subtree, top_level): + def aux(subtree, path): nonlocal lower - deeper = top_level - for element in subtree: + deeper = len(path) == 0 + for i, element in enumerate(subtree): if isinstance(element, list) or isinstance(element, tuple): - aux(element, top_level=False) + aux(element, path + (i,)) deeper = True if not deeper: - lower.append(subtree) + lower.append((path, subtree)) - aux(parsed, top_level=True) + aux(parsed, path=()) return lower +# TODO: probably optimize this, it creates lots of unnecessary tuples +def replace_position(tree, position, new_element): + + def aux(current_tree, remaining_route): + if len(remaining_route) == 0: + return new_element + + else: + step = remaining_route[0] + return ( + tree[:step] + + (aux(tree[step], remaining_route[1:]),) + + tree[step + 2:] + ) + + return aux(tree, position) + + def integrate_language(knowledge_base, example): text = example["text"].lower() parsed = example["parsed"] - print("P:", parsed) + resolved_parsed = copy.deepcopy(parsed) + tokens = to_tokens(text) + while True: - lower_levels = get_lower_levels(parsed) + print("P:", resolved_parsed) + lower_levels = get_lower_levels(resolved_parsed) print("Lower:", lower_levels) if len(lower_levels) == 0: break - for atom in lower_levels: + for position, atom in lower_levels: print("\x1b[1mSelecting\x1b[0m:", atom) similar = get_similar_tree(knowledge_base, atom) print("___>", similar) - remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, text, atom, similar) - tokens, matcher, result = make_template(knowledge_base, text, atom) + remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar) + _, matcher, result = make_template(knowledge_base, tokens, atom) print("Tx:", tokens) print("Mx:", matcher) print("Rx:", result) print("Remix:", remix) + after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix) assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens)) - print(" \\->", after_remix) + print( " +->", after_remix) + subquery_type = knowledge_evaluation.get_subquery_type(knowledge_base.knowledge, atom) + print(r" \-> <{}>".format(subquery_type)) + + # Clean remaining tokens + new_tokens = list(tokens) + offset = len(start_bounds) + for _ in range(len(remix)): + new_tokens.pop(offset) + + # TODO: Get a specific types for... types + new_tokens.insert(offset, "".format(subquery_type)) + tokens = new_tokens + + resolved_parsed = replace_position(resolved_parsed, position, subquery_type) print("#########") - break - tokens, matcher, result = make_template(knowledge_base, text, parsed) + tokens, matcher, result = make_template(knowledge_base, tokens, parsed) print("T:", tokens) print("M:", matcher) print("R:", result) @@ -86,10 +127,11 @@ def apply_remix(tokens, remix): return rebuilt -def build_remix_matrix(knowledge_base, text, atom, similar): +def build_remix_matrix(knowledge_base, tokens, atom, similar): # print("+" * 20) - tokens, matcher, result = make_template(knowledge_base, text, atom) + tokens = list(tokens) + tokens, matcher, result = make_template(knowledge_base, tokens, atom) similar_matcher, similar_result, similar_result_resolved, _ = similar # print("NEW:") diff --git a/naive-nlu/test.py b/naive-nlu/test.py index fe5b65b..c4b3b0b 100644 --- a/naive-nlu/test.py +++ b/naive-nlu/test.py @@ -11,10 +11,10 @@ examples = [ "text": "is icecream cold?", "parsed": ("question", ("exists-property-with-value", 'icecream', 'cold')) }, - # { - # "text": "lava is dangerous", - # "parsed": ("exists-property-with-value", 'lava', 'dangerous') - # }, + { + "text": "lava is dangerous", + "parsed": ("exists-property-with-value", 'lava', 'dangerous') + }, # { # "text": "is lava dangerous?", # "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')), @@ -100,10 +100,12 @@ def main(): ) differences = knowledge.train(examples) - # print("----") - # print(differences()) - # print("----") + print("----") + print(differences()) + print("----") + + test_assumption(True, knowledge, {'text': 'is lava dangerous?'}) # for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: # row = test['text'] # result, inferred_tree, differences = knowledge.process(row) From 42cb4cb8f196cd64fa3a075f1e5a3244a5ed86a8 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Tue, 16 May 2017 22:46:22 +0200 Subject: [PATCH 6/6] Add interpretation phase to shallow (1 level) nested structures. --- naive-nlu/knowledge_base.py | 1 + naive-nlu/parsing.py | 144 +++++++++++++++++++++++++++++++++--- naive-nlu/test.py | 16 ++-- 3 files changed, 143 insertions(+), 18 deletions(-) diff --git a/naive-nlu/knowledge_base.py b/naive-nlu/knowledge_base.py index 16845cc..cf99bb0 100644 --- a/naive-nlu/knowledge_base.py +++ b/naive-nlu/knowledge_base.py @@ -48,6 +48,7 @@ class KnowledgeBase(object): def process(self, row): knowledge_before = copy.deepcopy(self.knowledge) + print("\x1b[7;32m> {} \x1b[0m".format(row)) tokens, decomposition, inferred_tree = parsing.get_fit(self, row) result = knowledge_evaluation.integrate_information(self.knowledge, { diff --git a/naive-nlu/parsing.py b/naive-nlu/parsing.py index 75ea3f5..305e4cb 100644 --- a/naive-nlu/parsing.py +++ b/naive-nlu/parsing.py @@ -5,7 +5,9 @@ import knowledge_evaluation import re import copy from functools import reduce +from typing import List +MAX_RECURSIONS = 10 # TODO: more flexible tokenization def to_tokens(text): @@ -105,7 +107,7 @@ def integrate_language(knowledge_base, example): new_tokens.pop(offset) # TODO: Get a specific types for... types - new_tokens.insert(offset, "".format(subquery_type)) + new_tokens.insert(offset, (subquery_type, remix)) tokens = new_tokens resolved_parsed = replace_position(resolved_parsed, position, subquery_type) @@ -243,6 +245,8 @@ def get_similar_tree(knowledge_base, atom): return sorted_possibilities[0] + +# TODO: unroll this mess def get_matching(sample, other): l = len(sample[0]) other = list(filter(lambda x: len(x[0]) == l, other)) @@ -250,12 +254,19 @@ def get_matching(sample, other): if len(other) == 0: return [] - if not isinstance(sample[0][i], str): - other = list(filter(lambda x: not isinstance(x[0][i], str) and + if isinstance(sample[0][i], dict): # Dictionaries are compared by groups + other = list(filter(lambda x: isinstance(x[0][i], dict) and len(x[0][i]['groups'] & sample[0][i]['groups']) > 0, other)) + elif isinstance(sample[0][i], tuple): # Tuples are compared by types [0] + other = list(filter(lambda x: isinstance(x[0][i], tuple) and + x[0][i][0] == sample[0][i][0], + other)) + return [sample[0][x] if isinstance(sample[0][x], str) + else + sample[0][x] if isinstance(sample[0][x], tuple) else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b, map(lambda y: y[0][x]['groups'], other))} @@ -282,15 +293,124 @@ def reprocess_language_knowledge(knowledge_base, examples): return pattern_examples -def get_fit(knowledge, row): - row = row.lower().split() - for sample, ast in knowledge.trained: - if len(sample) != len(row): - continue +def fitting_return_type(knowledge, + return_type, remixer, + input_stream, + tail_of_ouput_stream, + remaining_recursions: int): + indent = " " + " " * (MAX_RECURSIONS - remaining_recursions) - if all(map(lambda x: (not isinstance(sample[x], str) - or sample[x] == row[x]), - range(len(sample)))): - return row, sample, ast + for sample, ast in knowledge.trained: + try: + parsed_input = [] + parsed_output = [] + + remaining_input = reverse_remix(input_stream, remixer) + print(indent + "RMXin:", remaining_input) + remaining_output = copy.deepcopy(sample) + + print(indent + "S:", sample) + print(indent + "A:", ast) + print() + + while len(remaining_output) > 0: + ((input, output), + (remaining_input, remaining_output)) = match_token(knowledge, + remaining_input, + remaining_output, + remaining_recursions - 1) + parsed_input += input + parsed_output += output + print(indent + "INP:", input) + print(indent + "OUT:", output) + + print(indent + "Pi:", parsed_input) + print(indent + "Po:", parsed_output) + print("\x1b[7m", end='') + print(indent + "Ri:", remaining_input) + print(indent + "Ro:", remaining_output) + print("\x1b[0m") + return ((parsed_input, parsed_output), + (remaining_input, remaining_output + tail_of_ouput_stream)) + except TypeError as e: + print(indent + "X " + str(e)) + pass + except IndexError as e: + print(indent + "X " + str(e)) + pass + raise TypeError("No matching type found") + + +def reverse_remix(tree_section, remix): + result_section = [] + for origin in remix: + result_section.append(copy.deepcopy(tree_section[origin])) + return result_section + tree_section[len(remix):] + + +def match_token(knowledge, + input: List[str], + trained: List[str], + remaining_recursions: int): + if remaining_recursions < 1: + return None + + # print("#" * (MAX_RECURSIONS - remaining_recursions)) + # print("Input:", input) + # print("Output:", trained) + indent = " " + " " * (MAX_RECURSIONS - remaining_recursions) + first_input = input[0] + expected_first = trained[0] + print(indent + "Ex?", expected_first) + print(indent + "Fo!", first_input) + + if isinstance(expected_first, dict): + # TODO: check if the dictionary matches the values + return (([first_input], [expected_first]), (input[1:], trained[1:])) + + elif isinstance(expected_first, tuple): + return_type, remixer = expected_first + return fitting_return_type(knowledge, + return_type, remixer, + input, trained[1:], + remaining_recursions) + + elif expected_first == first_input: + return (([first_input], [expected_first]), (input[1:], trained[1:])) + + return None + + +def get_fit(knowledge, row, remaining_recursions=MAX_RECURSIONS): + tokens = to_tokens(row) + indent = " " * (MAX_RECURSIONS - remaining_recursions) + for sample, ast in knowledge.trained: + print("-----") + print("TOK:", tokens) + try: + remaining_input = copy.deepcopy(tokens) + remaining_output = copy.deepcopy(sample) + print(indent + "AST:", ast) + print(indent + "S:", sample) + + # TODO: merge with get_return type, as uses the same mechanism + while len(remaining_output) > 0: + ((_, _), (remaining_input, remaining_output)) = match_token(knowledge, + remaining_input, + remaining_output, + remaining_recursions) + print(indent + "Ri:", remaining_input) + print(indent + "Ro:", remaining_output) + + if len(remaining_input) == 0 and len(remaining_input) == 0: + print("!!!", tokens, sample, ast) + return tokens, sample, ast + except TypeError as e: + print(indent + "X " + str(e)) + pass + except IndexError as e: + print(indent + "X " + str(e)) + pass + print() else: return None diff --git a/naive-nlu/test.py b/naive-nlu/test.py index c4b3b0b..ab62e73 100644 --- a/naive-nlu/test.py +++ b/naive-nlu/test.py @@ -1,6 +1,7 @@ import json from knowledge_base import KnowledgeBase +from modifiable_property import ModifiableProperty examples = [ { @@ -19,10 +20,10 @@ examples = [ # "text": "is lava dangerous?", # "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')), # }, - # { - # "text": "earth is a planet", - # "parsed": ("pertenence-to-group", 'earth', 'planet'), - # }, + { + "text": "earth is a planet", + "parsed": ("pertenence-to-group", 'earth', 'planet'), + }, # { # "text": "is earth a moon?", # "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')), @@ -91,7 +92,10 @@ def test_assumption(expectedResponse, knowledge, query): print("Expected: {}".format(expectedResponse)) result, abstract_tree, diff = knowledge.process(query['text']) - print("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if result != expectedResponse else "2", result)) + end_result = result.getter() if isinstance(result, ModifiableProperty) else result + + print("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result)) + assert(end_result == expectedResponse) def main(): @@ -105,6 +109,7 @@ def main(): print(differences()) print("----") + test_assumption(True, knowledge, {'text': 'earth is a planet'}) test_assumption(True, knowledge, {'text': 'is lava dangerous?'}) # for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: # row = test['text'] @@ -119,7 +124,6 @@ def main(): # queryTrue = { "text": "is io a moon?", "parsed": ("question", ("pertenence-to-group", "io", "moon")) } # queryFalse = { "text": "is io a planet?", "parsed": ("question", ("pertenence-to-group", "io", "planet")) } - # test_assumption(True, knowledge, queryTrue) # test_assumption(False, knowledge, queryFalse) if __name__ == '__main__':