Fix exploration of multiple options on a sub-level.

2017-05-17 00:27:23 +02:00 · 2017-05-17 00:27:23 +02:00 · 63227c4f3e
commit 63227c4f3e
parent ce7b50e023
3 changed files with 144 additions and 67 deletions
--- a/naive-nlu/knowledge_base.py
+++ b/naive-nlu/knowledge_base.py
@ -49,11 +49,10 @@ class KnowledgeBase(object):
    def process(self, row):
        knowledge_before = copy.deepcopy(self.knowledge)
        print("\x1b[7;32m> {} \x1b[0m".format(row))
-        tokens, decomposition, inferred_tree = parsing.get_fit(self, row)
+        tokens, inferred_tree = parsing.get_fit(self, row)
        result = knowledge_evaluation.integrate_information(self.knowledge,
                                                          {
                                                              "elements": tokens,
                                                              "decomposition": decomposition,
                                                              "parsed": inferred_tree,
                                                          })
        self.act_upon(result)
--- a/naive-nlu/parsing.py
+++ b/naive-nlu/parsing.py
@ -7,7 +7,7 @@ import copy
 from functools import reduce
 from typing import List
-MAX_RECURSIONS = 10
+MAX_RECURSIONS = 5
 # TODO: more flexible tokenization
 def to_tokens(text):
@ -155,7 +155,7 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
        matcher.pop(i)
        tokens.pop(i)
-    possible_remixes = get_possible_remixes(matcher, similar_matcher)
+    possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher)
    chosen_remix = possible_remixes[0]
    # print("New tokens:", tokens)
@ -163,15 +163,21 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
    return chosen_remix, (start_bounds, end_bounds)
-def get_possible_remixes(matcher, similar_matcher):
+def get_possible_remixes(knowledge_base, matcher, similar_matcher):
    # print("*" * 20)
    # print(matcher)
    # print(similar_matcher)
    matrix = []
    for element in matcher:
-        assert(element in similar_matcher)
+        print("-", element)
-        indexes = all_indexes(similar_matcher, element)
+        print("+", similar_matcher)
        assert(element in similar_matcher or isinstance(element, dict))
        if isinstance(element, dict):
            indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
        else:
            indexes = all_indexes(similar_matcher, element)
        matrix.append(indexes)
    # print(matrix)
@ -192,6 +198,24 @@ def all_indexes(collection, element):
    return indexes
 def all_matching_indexes(knowledge_base, collection, element):
    indexes = []
    assert("groups" in element)
    element = element["groups"]
    for i, instance in enumerate(collection):
        if isinstance(instance, dict):
            instance = instance["groups"]
        elif instance in knowledge_base.knowledge:
            instance = knowledge_base.knowledge[instance]["groups"]
        intersection = set(instance) & set(element)
        if len(intersection) > 0:
            indexes.append((i, intersection))
    return [x[0] for x in sorted(indexes, key=lambda x: len(x[1]), reverse=True)]
 def find_bounds(matcher, similar_matcher):
    start_bounds = []
    for i, element in enumerate(matcher):
@ -314,27 +338,33 @@ def fitting_return_type(knowledge,
            print()
            while len(remaining_output) > 0:
-                ((input, output),
+                for (elements,
-                 (remaining_input, remaining_output)) = match_token(knowledge,
+                     (remaining_input,
-                                                                    remaining_input,
+                      remaining_output)) in match_token(knowledge,
-                                                                    remaining_output,
+                                                        remaining_input,
-                                                                    remaining_recursions - 1)
+                                                        remaining_output,
-                parsed_input += input
+                                                        remaining_recursions - 1):
-                parsed_output += output
+                    parsed_input += elements
-                print(indent + "INP:", input)
+                    print(indent + "Elements:", elements)
-                print(indent + "OUT:", output)
+                    break
            print(indent + "Pi:", parsed_input)
            print(indent + "Po:", parsed_output)
-            print("\x1b[7m", end='')
+            print("\x1b[7m" + indent + "Ri:",
-            print(indent + "Ri:", remaining_input)
+                  remaining_input,
-            print(indent + "Ro:", remaining_output)
+                  "\x1b[0m]")
-            print("\x1b[0m")
+            print("\x1b[7m" + indent + "Ro:",
-            return ((parsed_input, parsed_output),
+                  remaining_output + tail_of_ouput_stream,
-                    (remaining_input, remaining_output + tail_of_ouput_stream))
+                  "\x1b[0m]")
            print()
            yield (parsed_input,
                   (remaining_input, remaining_output + tail_of_ouput_stream))
        except TypeError as e:
            print(indent + "X    " + str(e))
            pass
        except ValueError as e:
            print(indent + "X    " + str(e))
            pass
        except IndexError as e:
            print(indent + "X    " + str(e))
            pass
@ -353,7 +383,7 @@ def match_token(knowledge,
                trained: List[str],
                remaining_recursions: int):
    if remaining_recursions < 1:
-        return None
+        yield None
    # print("#" * (MAX_RECURSIONS - remaining_recursions))
    # print("Input:", input)
@ -366,18 +396,60 @@ def match_token(knowledge,
    if isinstance(expected_first, dict):
        # TODO: check if the dictionary matches the values
-        return (([first_input], [expected_first]), (input[1:], trained[1:]))
+        yield (([first_input]), (input[1:], trained[1:]))
    elif isinstance(expected_first, tuple):
        return_type, remixer = expected_first
-        return fitting_return_type(knowledge,
+        for r in fitting_return_type(knowledge,
-                                   return_type, remixer,
+                                     return_type, remixer,
-                                   input, trained[1:],
+                                     input, trained[1:],
-                                   remaining_recursions)
+                                     remaining_recursions):
            print("-->", r)
            yield r
    elif expected_first == first_input:
-        return (([first_input], [expected_first]), (input[1:], trained[1:]))
+        yield (([first_input]), (input[1:], trained[1:]))
    yield None
 def get_fit_onwards(knowledge, ast, remaining_input, remaining_output, remaining_recursions):
    indent = "." + "  " * (MAX_RECURSIONS - remaining_recursions)
    try:
        # TODO: merge with get_return type, as uses the same mechanism
        if len(remaining_output) > 0:
            for (elements,
                 (input_for_next_level,
                  output_for_next_level)) in match_token(knowledge,
                                                         remaining_input,
                                                         remaining_output,
                                                         remaining_recursions):
                print("Nli:", input_for_next_level)
                print("Nlo:", output_for_next_level)
                print(indent + "E", elements)
                try:
                    result = get_fit_onwards(knowledge, ast, input_for_next_level, output_for_next_level, remaining_recursions)
                    print(indent + "→", result)
                    lower_elements, _ = result
                    print("<<<<< ELM:", elements, lower_elements)
                    return elements + lower_elements, ast
                except TypeError as e:
                    print(indent + "X    " + str(e))
                except IndexError as e:
                    print(indent + "X    " + str(e))
            else:
                print(indent + "Ri:", remaining_input)
                print(indent + "Ro:", remaining_output)
                print("OK")
        elif len(remaining_input) == 0 and len(remaining_input) == 0:
            print("<<<<< AST:", ast)
            return [], ast
    except TypeError as e:
        print(indent + "X    " + str(e))
    except IndexError as e:
        print(indent + "X    " + str(e))
    return None
@ -392,25 +464,14 @@ def get_fit(knowledge, row, remaining_recursions=MAX_RECURSIONS):
            remaining_output = copy.deepcopy(sample)
            print(indent + "AST:", ast)
            print(indent + "S:", sample)
-
+            result = get_fit_onwards(knowledge, ast, remaining_input,
-            # TODO: merge with get_return type, as uses the same mechanism
+                                     remaining_output, remaining_recursions)
-            while len(remaining_output) > 0:
+            if result is not None:
-                ((_, _), (remaining_input, remaining_output)) = match_token(knowledge,
+                return result
                                                                            remaining_input,
                                                                            remaining_output,
                                                                            remaining_recursions)
                print(indent + "Ri:", remaining_input)
                print(indent + "Ro:", remaining_output)
            if len(remaining_input) == 0 and len(remaining_input) == 0:
                print("!!!", tokens, sample, ast)
                return tokens, sample, ast
        except TypeError as e:
            print(indent + "X    " + str(e))
            pass
        except IndexError as e:
            print(indent + "X    " + str(e))
            pass
        print()
    else:
        return None
--- a/naive-nlu/test.py
+++ b/naive-nlu/test.py
@ -16,30 +16,34 @@ examples = [
        "text": "lava is dangerous",
        "parsed": ("exists-property-with-value", 'lava', 'dangerous')
    },
-    # {
+    {
-    #     "text": "is lava dangerous?",
+        "text": "is lava dangerous?",
-    #     "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')),
+        "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')),
-    # },
+    },
    {
        "text": "earth is a planet",
        "parsed": ("pertenence-to-group", 'earth', 'planet'),
    },
-    # {
+    {
-    #     "text": "is earth a moon?",
+        "text": "io is a moon",
-    #     "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')),
+        "parsed": ("pertenence-to-group", 'io', 'moon'),
-    # },
+    },
-    # {
+    {
-    #     "text": "Green is a color",
+        "text": "is earth a moon?",
-    #     "parsed": ("pertenence-to-group", 'green', 'color'),
+        "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')),
-    # },
+    },
-    # {
+    {
-    #     "text": "a plane can fly",
+        "text": "Green is a color",
-    #     "parsed": ("has-capacity", 'plane', 'fly')
+        "parsed": ("pertenence-to-group", 'green', 'color'),
-    # },
+    },
-    # {
+    {
-    #     "text": "a wale can swim",
+        "text": "a plane can fly",
-    #     "parsed": ("has-capacity", 'wale', 'swim')
+        "parsed": ("has-capacity", 'plane', 'fly')
-    # },
+    },
    {
        "text": "a wale can swim",
        "parsed": ("has-capacity", 'wale', 'swim')
    },
 ]
 base_knowledge = {
@ -52,6 +56,9 @@ base_knowledge = {
    'earth': {
        "groups": set(['noun', 'object', 'planet']),
    },
    'io': {
        "groups": set(['noun', 'object']),
    },
    'green': {
        "groups": set(['noun', 'color', 'concept']),
    },
@ -75,6 +82,9 @@ base_knowledge = {
    'planet': {
        "groups": set(['noun', 'group']),
    },
    'moon': {
        "groups": set(['noun', 'group']),
    },
    'color': {
        "groups": set(['property', 'group']),
    },
@ -121,10 +131,17 @@ def main():
    # print('-----')
    # print(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
    # print('-----')
-    # queryTrue = { "text": "is io a moon?", "parsed": ("question", ("pertenence-to-group", "io", "moon")) }
+    queryTrue = {
-    # queryFalse = { "text": "is io a planet?", "parsed": ("question", ("pertenence-to-group", "io", "planet")) }
+        "text": "is io a moon?",
        "parsed": ("question", ("pertenence-to-group", "io", "moon"))
    }
    queryFalse = {
        "text": "is io a planet?",
        "parsed": ("question", ("pertenence-to-group", "io", "planet"))
    }
-    # test_assumption(False, knowledge, queryFalse)
+    test_assumption(False, knowledge, queryFalse)
    test_assumption(True, knowledge, queryTrue)
 if __name__ == '__main__':
    main()