Fix exploration of multiple options on a sub-level.

2017-05-17 00:27:23 +02:00 · 2017-05-17 00:27:23 +02:00 · 63227c4f3e
commit 63227c4f3e
parent ce7b50e023
3 changed files with 144 additions and 67 deletions
--- a/naive-nlu/knowledge_base.py
+++ b/naive-nlu/knowledge_base.py
@ -49,11 +49,10 @@ class KnowledgeBase(object):
    def process(self, row):
        knowledge_before = copy.deepcopy(self.knowledge)
        print("\x1b[7;32m> {} \x1b[0m".format(row))
-        tokens, decomposition, inferred_tree = parsing.get_fit(self, row)
+        tokens, inferred_tree = parsing.get_fit(self, row)
        result = knowledge_evaluation.integrate_information(self.knowledge,
                                                          {
                                                              "elements": tokens,
-                                                              "decomposition": decomposition,
                                                              "parsed": inferred_tree,
                                                          })
        self.act_upon(result)
--- a/naive-nlu/parsing.py
+++ b/naive-nlu/parsing.py
@ -7,7 +7,7 @@ import copy
 from functools import reduce
 from typing import List

-MAX_RECURSIONS = 10
+MAX_RECURSIONS = 5

 # TODO: more flexible tokenization
 def to_tokens(text):
@ -155,7 +155,7 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
        matcher.pop(i)
        tokens.pop(i)

-    possible_remixes = get_possible_remixes(matcher, similar_matcher)
+    possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher)
    chosen_remix = possible_remixes[0]

    # print("New tokens:", tokens)
@ -163,14 +163,20 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
    return chosen_remix, (start_bounds, end_bounds)


-def get_possible_remixes(matcher, similar_matcher):
+def get_possible_remixes(knowledge_base, matcher, similar_matcher):
    # print("*" * 20)
    # print(matcher)
    # print(similar_matcher)

    matrix = []
    for element in matcher:
-        assert(element in similar_matcher)
+        print("-", element)
+        print("+", similar_matcher)
+        assert(element in similar_matcher or isinstance(element, dict))
+
+        if isinstance(element, dict):
+            indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
+        else:
            indexes = all_indexes(similar_matcher, element)
        matrix.append(indexes)

@ -192,6 +198,24 @@ def all_indexes(collection, element):
    return indexes


+def all_matching_indexes(knowledge_base, collection, element):
+    indexes = []
+
+    assert("groups" in element)
+    element = element["groups"]
+    for i, instance in enumerate(collection):
+        if isinstance(instance, dict):
+            instance = instance["groups"]
+        elif instance in knowledge_base.knowledge:
+            instance = knowledge_base.knowledge[instance]["groups"]
+
+        intersection = set(instance) & set(element)
+        if len(intersection) > 0:
+            indexes.append((i, intersection))
+
+    return [x[0] for x in sorted(indexes, key=lambda x: len(x[1]), reverse=True)]
+
+
 def find_bounds(matcher, similar_matcher):
    start_bounds = []
    for i, element in enumerate(matcher):
@ -314,27 +338,33 @@ def fitting_return_type(knowledge,
            print()

            while len(remaining_output) > 0:
-                ((input, output),
-                 (remaining_input, remaining_output)) = match_token(knowledge,
+                for (elements,
+                     (remaining_input,
+                      remaining_output)) in match_token(knowledge,
                                                        remaining_input,
                                                        remaining_output,
-                                                                    remaining_recursions - 1)
-                parsed_input += input
-                parsed_output += output
-                print(indent + "INP:", input)
-                print(indent + "OUT:", output)
+                                                        remaining_recursions - 1):
+                    parsed_input += elements
+                    print(indent + "Elements:", elements)
+                    break

            print(indent + "Pi:", parsed_input)
            print(indent + "Po:", parsed_output)
-            print("\x1b[7m", end='')
-            print(indent + "Ri:", remaining_input)
-            print(indent + "Ro:", remaining_output)
-            print("\x1b[0m")
-            return ((parsed_input, parsed_output),
+            print("\x1b[7m" + indent + "Ri:",
+                  remaining_input,
+                  "\x1b[0m]")
+            print("\x1b[7m" + indent + "Ro:",
+                  remaining_output + tail_of_ouput_stream,
+                  "\x1b[0m]")
+            print()
+            yield (parsed_input,
                   (remaining_input, remaining_output + tail_of_ouput_stream))
        except TypeError as e:
            print(indent + "X    " + str(e))
            pass
+        except ValueError as e:
+            print(indent + "X    " + str(e))
+            pass
        except IndexError as e:
            print(indent + "X    " + str(e))
            pass
@ -353,7 +383,7 @@ def match_token(knowledge,
                trained: List[str],
                remaining_recursions: int):
    if remaining_recursions < 1:
-        return None
+        yield None

    # print("#" * (MAX_RECURSIONS - remaining_recursions))
    # print("Input:", input)
@ -366,18 +396,60 @@ def match_token(knowledge,

    if isinstance(expected_first, dict):
        # TODO: check if the dictionary matches the values
-        return (([first_input], [expected_first]), (input[1:], trained[1:]))
+        yield (([first_input]), (input[1:], trained[1:]))

    elif isinstance(expected_first, tuple):
        return_type, remixer = expected_first
-        return fitting_return_type(knowledge,
+        for r in fitting_return_type(knowledge,
                                     return_type, remixer,
                                     input, trained[1:],
-                                   remaining_recursions)
+                                     remaining_recursions):
+            print("-->", r)
+            yield r

    elif expected_first == first_input:
-        return (([first_input], [expected_first]), (input[1:], trained[1:]))
+        yield (([first_input]), (input[1:], trained[1:]))

+    yield None
+
+
+def get_fit_onwards(knowledge, ast, remaining_input, remaining_output, remaining_recursions):
+    indent = "." + "  " * (MAX_RECURSIONS - remaining_recursions)
+    try:
+        # TODO: merge with get_return type, as uses the same mechanism
+        if len(remaining_output) > 0:
+            for (elements,
+                 (input_for_next_level,
+                  output_for_next_level)) in match_token(knowledge,
+                                                         remaining_input,
+                                                         remaining_output,
+                                                         remaining_recursions):
+                print("Nli:", input_for_next_level)
+                print("Nlo:", output_for_next_level)
+                print(indent + "E", elements)
+                try:
+                    result = get_fit_onwards(knowledge, ast, input_for_next_level, output_for_next_level, remaining_recursions)
+                    print(indent + "→", result)
+                    lower_elements, _ = result
+                    print("<<<<< ELM:", elements, lower_elements)
+                    return elements + lower_elements, ast
+                except TypeError as e:
+                    print(indent + "X    " + str(e))
+                except IndexError as e:
+                    print(indent + "X    " + str(e))
+
+            else:
+                print(indent + "Ri:", remaining_input)
+                print(indent + "Ro:", remaining_output)
+                print("OK")
+        elif len(remaining_input) == 0 and len(remaining_input) == 0:
+            print("<<<<< AST:", ast)
+            return [], ast
+
+    except TypeError as e:
+        print(indent + "X    " + str(e))
+    except IndexError as e:
+        print(indent + "X    " + str(e))
    return None


@ -392,25 +464,14 @@ def get_fit(knowledge, row, remaining_recursions=MAX_RECURSIONS):
            remaining_output = copy.deepcopy(sample)
            print(indent + "AST:", ast)
            print(indent + "S:", sample)
-
-            # TODO: merge with get_return type, as uses the same mechanism
-            while len(remaining_output) > 0:
-                ((_, _), (remaining_input, remaining_output)) = match_token(knowledge,
-                                                                            remaining_input,
-                                                                            remaining_output,
-                                                                            remaining_recursions)
-                print(indent + "Ri:", remaining_input)
-                print(indent + "Ro:", remaining_output)
-
-            if len(remaining_input) == 0 and len(remaining_input) == 0:
-                print("!!!", tokens, sample, ast)
-                return tokens, sample, ast
+            result = get_fit_onwards(knowledge, ast, remaining_input,
+                                     remaining_output, remaining_recursions)
+            if result is not None:
+                return result
        except TypeError as e:
            print(indent + "X    " + str(e))
-            pass
        except IndexError as e:
            print(indent + "X    " + str(e))
-            pass
        print()
    else:
        return None
--- a/naive-nlu/test.py
+++ b/naive-nlu/test.py
@ -16,30 +16,34 @@ examples = [
        "text": "lava is dangerous",
        "parsed": ("exists-property-with-value", 'lava', 'dangerous')
    },
-    # {
-    #     "text": "is lava dangerous?",
-    #     "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')),
-    # },
+    {
+        "text": "is lava dangerous?",
+        "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')),
+    },
    {
        "text": "earth is a planet",
        "parsed": ("pertenence-to-group", 'earth', 'planet'),
    },
-    # {
-    #     "text": "is earth a moon?",
-    #     "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')),
-    # },
-    # {
-    #     "text": "Green is a color",
-    #     "parsed": ("pertenence-to-group", 'green', 'color'),
-    # },
-    # {
-    #     "text": "a plane can fly",
-    #     "parsed": ("has-capacity", 'plane', 'fly')
-    # },
-    # {
-    #     "text": "a wale can swim",
-    #     "parsed": ("has-capacity", 'wale', 'swim')
-    # },
+    {
+        "text": "io is a moon",
+        "parsed": ("pertenence-to-group", 'io', 'moon'),
+    },
+    {
+        "text": "is earth a moon?",
+        "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')),
+    },
+    {
+        "text": "Green is a color",
+        "parsed": ("pertenence-to-group", 'green', 'color'),
+    },
+    {
+        "text": "a plane can fly",
+        "parsed": ("has-capacity", 'plane', 'fly')
+    },
+    {
+        "text": "a wale can swim",
+        "parsed": ("has-capacity", 'wale', 'swim')
+    },
 ]

 base_knowledge = {
@ -52,6 +56,9 @@ base_knowledge = {
    'earth': {
        "groups": set(['noun', 'object', 'planet']),
    },
+    'io': {
+        "groups": set(['noun', 'object']),
+    },
    'green': {
        "groups": set(['noun', 'color', 'concept']),
    },
@ -75,6 +82,9 @@ base_knowledge = {
    'planet': {
        "groups": set(['noun', 'group']),
    },
+    'moon': {
+        "groups": set(['noun', 'group']),
+    },
    'color': {
        "groups": set(['property', 'group']),
    },
@ -121,10 +131,17 @@ def main():
    # print('-----')
    # print(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
    # print('-----')
-    # queryTrue = { "text": "is io a moon?", "parsed": ("question", ("pertenence-to-group", "io", "moon")) }
-    # queryFalse = { "text": "is io a planet?", "parsed": ("question", ("pertenence-to-group", "io", "planet")) }
+    queryTrue = {
+        "text": "is io a moon?",
+        "parsed": ("question", ("pertenence-to-group", "io", "moon"))
+    }
+    queryFalse = {
+        "text": "is io a planet?",
+        "parsed": ("question", ("pertenence-to-group", "io", "planet"))
+    }

-    # test_assumption(False, knowledge, queryFalse)
+    test_assumption(False, knowledge, queryFalse)
+    test_assumption(True, knowledge, queryTrue)

 if __name__ == '__main__':
    main()