From 63227c4f3eda946e43572099e32c069353641cc9 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 17 May 2017 00:27:23 +0200 Subject: [PATCH] Fix exploration of multiple options on a sub-level. --- naive-nlu/knowledge_base.py | 3 +- naive-nlu/parsing.py | 145 +++++++++++++++++++++++++----------- naive-nlu/test.py | 63 ++++++++++------ 3 files changed, 144 insertions(+), 67 deletions(-) diff --git a/naive-nlu/knowledge_base.py b/naive-nlu/knowledge_base.py index cf99bb0..3fedac2 100644 --- a/naive-nlu/knowledge_base.py +++ b/naive-nlu/knowledge_base.py @@ -49,11 +49,10 @@ class KnowledgeBase(object): def process(self, row): knowledge_before = copy.deepcopy(self.knowledge) print("\x1b[7;32m> {} \x1b[0m".format(row)) - tokens, decomposition, inferred_tree = parsing.get_fit(self, row) + tokens, inferred_tree = parsing.get_fit(self, row) result = knowledge_evaluation.integrate_information(self.knowledge, { "elements": tokens, - "decomposition": decomposition, "parsed": inferred_tree, }) self.act_upon(result) diff --git a/naive-nlu/parsing.py b/naive-nlu/parsing.py index 305e4cb..43c4538 100644 --- a/naive-nlu/parsing.py +++ b/naive-nlu/parsing.py @@ -7,7 +7,7 @@ import copy from functools import reduce from typing import List -MAX_RECURSIONS = 10 +MAX_RECURSIONS = 5 # TODO: more flexible tokenization def to_tokens(text): @@ -155,7 +155,7 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar): matcher.pop(i) tokens.pop(i) - possible_remixes = get_possible_remixes(matcher, similar_matcher) + possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher) chosen_remix = possible_remixes[0] # print("New tokens:", tokens) @@ -163,15 +163,21 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar): return chosen_remix, (start_bounds, end_bounds) -def get_possible_remixes(matcher, similar_matcher): +def get_possible_remixes(knowledge_base, matcher, similar_matcher): # print("*" * 20) # print(matcher) # print(similar_matcher) matrix = [] for element in matcher: - assert(element in similar_matcher) - indexes = all_indexes(similar_matcher, element) + print("-", element) + print("+", similar_matcher) + assert(element in similar_matcher or isinstance(element, dict)) + + if isinstance(element, dict): + indexes = all_matching_indexes(knowledge_base, similar_matcher, element) + else: + indexes = all_indexes(similar_matcher, element) matrix.append(indexes) # print(matrix) @@ -192,6 +198,24 @@ def all_indexes(collection, element): return indexes +def all_matching_indexes(knowledge_base, collection, element): + indexes = [] + + assert("groups" in element) + element = element["groups"] + for i, instance in enumerate(collection): + if isinstance(instance, dict): + instance = instance["groups"] + elif instance in knowledge_base.knowledge: + instance = knowledge_base.knowledge[instance]["groups"] + + intersection = set(instance) & set(element) + if len(intersection) > 0: + indexes.append((i, intersection)) + + return [x[0] for x in sorted(indexes, key=lambda x: len(x[1]), reverse=True)] + + def find_bounds(matcher, similar_matcher): start_bounds = [] for i, element in enumerate(matcher): @@ -314,27 +338,33 @@ def fitting_return_type(knowledge, print() while len(remaining_output) > 0: - ((input, output), - (remaining_input, remaining_output)) = match_token(knowledge, - remaining_input, - remaining_output, - remaining_recursions - 1) - parsed_input += input - parsed_output += output - print(indent + "INP:", input) - print(indent + "OUT:", output) + for (elements, + (remaining_input, + remaining_output)) in match_token(knowledge, + remaining_input, + remaining_output, + remaining_recursions - 1): + parsed_input += elements + print(indent + "Elements:", elements) + break print(indent + "Pi:", parsed_input) print(indent + "Po:", parsed_output) - print("\x1b[7m", end='') - print(indent + "Ri:", remaining_input) - print(indent + "Ro:", remaining_output) - print("\x1b[0m") - return ((parsed_input, parsed_output), - (remaining_input, remaining_output + tail_of_ouput_stream)) + print("\x1b[7m" + indent + "Ri:", + remaining_input, + "\x1b[0m]") + print("\x1b[7m" + indent + "Ro:", + remaining_output + tail_of_ouput_stream, + "\x1b[0m]") + print() + yield (parsed_input, + (remaining_input, remaining_output + tail_of_ouput_stream)) except TypeError as e: print(indent + "X " + str(e)) pass + except ValueError as e: + print(indent + "X " + str(e)) + pass except IndexError as e: print(indent + "X " + str(e)) pass @@ -353,7 +383,7 @@ def match_token(knowledge, trained: List[str], remaining_recursions: int): if remaining_recursions < 1: - return None + yield None # print("#" * (MAX_RECURSIONS - remaining_recursions)) # print("Input:", input) @@ -366,18 +396,60 @@ def match_token(knowledge, if isinstance(expected_first, dict): # TODO: check if the dictionary matches the values - return (([first_input], [expected_first]), (input[1:], trained[1:])) + yield (([first_input]), (input[1:], trained[1:])) elif isinstance(expected_first, tuple): return_type, remixer = expected_first - return fitting_return_type(knowledge, - return_type, remixer, - input, trained[1:], - remaining_recursions) + for r in fitting_return_type(knowledge, + return_type, remixer, + input, trained[1:], + remaining_recursions): + print("-->", r) + yield r elif expected_first == first_input: - return (([first_input], [expected_first]), (input[1:], trained[1:])) + yield (([first_input]), (input[1:], trained[1:])) + yield None + + +def get_fit_onwards(knowledge, ast, remaining_input, remaining_output, remaining_recursions): + indent = "." + " " * (MAX_RECURSIONS - remaining_recursions) + try: + # TODO: merge with get_return type, as uses the same mechanism + if len(remaining_output) > 0: + for (elements, + (input_for_next_level, + output_for_next_level)) in match_token(knowledge, + remaining_input, + remaining_output, + remaining_recursions): + print("Nli:", input_for_next_level) + print("Nlo:", output_for_next_level) + print(indent + "E", elements) + try: + result = get_fit_onwards(knowledge, ast, input_for_next_level, output_for_next_level, remaining_recursions) + print(indent + "→", result) + lower_elements, _ = result + print("<<<<< ELM:", elements, lower_elements) + return elements + lower_elements, ast + except TypeError as e: + print(indent + "X " + str(e)) + except IndexError as e: + print(indent + "X " + str(e)) + + else: + print(indent + "Ri:", remaining_input) + print(indent + "Ro:", remaining_output) + print("OK") + elif len(remaining_input) == 0 and len(remaining_input) == 0: + print("<<<<< AST:", ast) + return [], ast + + except TypeError as e: + print(indent + "X " + str(e)) + except IndexError as e: + print(indent + "X " + str(e)) return None @@ -392,25 +464,14 @@ def get_fit(knowledge, row, remaining_recursions=MAX_RECURSIONS): remaining_output = copy.deepcopy(sample) print(indent + "AST:", ast) print(indent + "S:", sample) - - # TODO: merge with get_return type, as uses the same mechanism - while len(remaining_output) > 0: - ((_, _), (remaining_input, remaining_output)) = match_token(knowledge, - remaining_input, - remaining_output, - remaining_recursions) - print(indent + "Ri:", remaining_input) - print(indent + "Ro:", remaining_output) - - if len(remaining_input) == 0 and len(remaining_input) == 0: - print("!!!", tokens, sample, ast) - return tokens, sample, ast + result = get_fit_onwards(knowledge, ast, remaining_input, + remaining_output, remaining_recursions) + if result is not None: + return result except TypeError as e: print(indent + "X " + str(e)) - pass except IndexError as e: print(indent + "X " + str(e)) - pass print() else: return None diff --git a/naive-nlu/test.py b/naive-nlu/test.py index ab62e73..e27dd95 100644 --- a/naive-nlu/test.py +++ b/naive-nlu/test.py @@ -16,30 +16,34 @@ examples = [ "text": "lava is dangerous", "parsed": ("exists-property-with-value", 'lava', 'dangerous') }, - # { - # "text": "is lava dangerous?", - # "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')), - # }, + { + "text": "is lava dangerous?", + "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')), + }, { "text": "earth is a planet", "parsed": ("pertenence-to-group", 'earth', 'planet'), }, - # { - # "text": "is earth a moon?", - # "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')), - # }, - # { - # "text": "Green is a color", - # "parsed": ("pertenence-to-group", 'green', 'color'), - # }, - # { - # "text": "a plane can fly", - # "parsed": ("has-capacity", 'plane', 'fly') - # }, - # { - # "text": "a wale can swim", - # "parsed": ("has-capacity", 'wale', 'swim') - # }, + { + "text": "io is a moon", + "parsed": ("pertenence-to-group", 'io', 'moon'), + }, + { + "text": "is earth a moon?", + "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')), + }, + { + "text": "Green is a color", + "parsed": ("pertenence-to-group", 'green', 'color'), + }, + { + "text": "a plane can fly", + "parsed": ("has-capacity", 'plane', 'fly') + }, + { + "text": "a wale can swim", + "parsed": ("has-capacity", 'wale', 'swim') + }, ] base_knowledge = { @@ -52,6 +56,9 @@ base_knowledge = { 'earth': { "groups": set(['noun', 'object', 'planet']), }, + 'io': { + "groups": set(['noun', 'object']), + }, 'green': { "groups": set(['noun', 'color', 'concept']), }, @@ -75,6 +82,9 @@ base_knowledge = { 'planet': { "groups": set(['noun', 'group']), }, + 'moon': { + "groups": set(['noun', 'group']), + }, 'color': { "groups": set(['property', 'group']), }, @@ -121,10 +131,17 @@ def main(): # print('-----') # print(json.dumps(sorted(knowledge.knowledge.keys()), indent=4)) # print('-----') - # queryTrue = { "text": "is io a moon?", "parsed": ("question", ("pertenence-to-group", "io", "moon")) } - # queryFalse = { "text": "is io a planet?", "parsed": ("question", ("pertenence-to-group", "io", "planet")) } + queryTrue = { + "text": "is io a moon?", + "parsed": ("question", ("pertenence-to-group", "io", "moon")) + } + queryFalse = { + "text": "is io a planet?", + "parsed": ("question", ("pertenence-to-group", "io", "planet")) + } - # test_assumption(False, knowledge, queryFalse) + test_assumption(False, knowledge, queryFalse) + test_assumption(True, knowledge, queryTrue) if __name__ == '__main__': main()