From 63227c4f3eda946e43572099e32c069353641cc9 Mon Sep 17 00:00:00 2001
From: kenkeiras <kenkeiras@codigoparallevar.com>
Date: Wed, 17 May 2017 00:27:23 +0200
Subject: [PATCH] Fix exploration of multiple options on a sub-level.

---
 naive-nlu/knowledge_base.py |   3 +-
 naive-nlu/parsing.py        | 145 +++++++++++++++++++++++++-----------
 naive-nlu/test.py           |  63 ++++++++++------
 3 files changed, 144 insertions(+), 67 deletions(-)

diff --git a/naive-nlu/knowledge_base.py b/naive-nlu/knowledge_base.py
index cf99bb0..3fedac2 100644
--- a/naive-nlu/knowledge_base.py
+++ b/naive-nlu/knowledge_base.py
@@ -49,11 +49,10 @@ class KnowledgeBase(object):
     def process(self, row):
         knowledge_before = copy.deepcopy(self.knowledge)
         print("\x1b[7;32m> {} \x1b[0m".format(row))
-        tokens, decomposition, inferred_tree = parsing.get_fit(self, row)
+        tokens, inferred_tree = parsing.get_fit(self, row)
         result = knowledge_evaluation.integrate_information(self.knowledge,
                                                           {
                                                               "elements": tokens,
-                                                              "decomposition": decomposition,
                                                               "parsed": inferred_tree,
                                                           })
         self.act_upon(result)
diff --git a/naive-nlu/parsing.py b/naive-nlu/parsing.py
index 305e4cb..43c4538 100644
--- a/naive-nlu/parsing.py
+++ b/naive-nlu/parsing.py
@@ -7,7 +7,7 @@ import copy
 from functools import reduce
 from typing import List
 
-MAX_RECURSIONS = 10
+MAX_RECURSIONS = 5
 
 # TODO: more flexible tokenization
 def to_tokens(text):
@@ -155,7 +155,7 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
         matcher.pop(i)
         tokens.pop(i)
 
-    possible_remixes = get_possible_remixes(matcher, similar_matcher)
+    possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher)
     chosen_remix = possible_remixes[0]
 
     # print("New tokens:", tokens)
@@ -163,15 +163,21 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
     return chosen_remix, (start_bounds, end_bounds)
 
 
-def get_possible_remixes(matcher, similar_matcher):
+def get_possible_remixes(knowledge_base, matcher, similar_matcher):
     # print("*" * 20)
     # print(matcher)
     # print(similar_matcher)
 
     matrix = []
     for element in matcher:
-        assert(element in similar_matcher)
-        indexes = all_indexes(similar_matcher, element)
+        print("-", element)
+        print("+", similar_matcher)
+        assert(element in similar_matcher or isinstance(element, dict))
+
+        if isinstance(element, dict):
+            indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
+        else:
+            indexes = all_indexes(similar_matcher, element)
         matrix.append(indexes)
 
     # print(matrix)
@@ -192,6 +198,24 @@ def all_indexes(collection, element):
     return indexes
 
 
+def all_matching_indexes(knowledge_base, collection, element):
+    indexes = []
+
+    assert("groups" in element)
+    element = element["groups"]
+    for i, instance in enumerate(collection):
+        if isinstance(instance, dict):
+            instance = instance["groups"]
+        elif instance in knowledge_base.knowledge:
+            instance = knowledge_base.knowledge[instance]["groups"]
+
+        intersection = set(instance) & set(element)
+        if len(intersection) > 0:
+            indexes.append((i, intersection))
+
+    return [x[0] for x in sorted(indexes, key=lambda x: len(x[1]), reverse=True)]
+
+
 def find_bounds(matcher, similar_matcher):
     start_bounds = []
     for i, element in enumerate(matcher):
@@ -314,27 +338,33 @@ def fitting_return_type(knowledge,
             print()
 
             while len(remaining_output) > 0:
-                ((input, output),
-                 (remaining_input, remaining_output)) = match_token(knowledge,
-                                                                    remaining_input,
-                                                                    remaining_output,
-                                                                    remaining_recursions - 1)
-                parsed_input += input
-                parsed_output += output
-                print(indent + "INP:", input)
-                print(indent + "OUT:", output)
+                for (elements,
+                     (remaining_input,
+                      remaining_output)) in match_token(knowledge,
+                                                        remaining_input,
+                                                        remaining_output,
+                                                        remaining_recursions - 1):
+                    parsed_input += elements
+                    print(indent + "Elements:", elements)
+                    break
 
             print(indent + "Pi:", parsed_input)
             print(indent + "Po:", parsed_output)
-            print("\x1b[7m", end='')
-            print(indent + "Ri:", remaining_input)
-            print(indent + "Ro:", remaining_output)
-            print("\x1b[0m")
-            return ((parsed_input, parsed_output),
-                    (remaining_input, remaining_output + tail_of_ouput_stream))
+            print("\x1b[7m" + indent + "Ri:",
+                  remaining_input,
+                  "\x1b[0m]")
+            print("\x1b[7m" + indent + "Ro:",
+                  remaining_output + tail_of_ouput_stream,
+                  "\x1b[0m]")
+            print()
+            yield (parsed_input,
+                   (remaining_input, remaining_output + tail_of_ouput_stream))
         except TypeError as e:
             print(indent + "X    " + str(e))
             pass
+        except ValueError as e:
+            print(indent + "X    " + str(e))
+            pass
         except IndexError as e:
             print(indent + "X    " + str(e))
             pass
@@ -353,7 +383,7 @@ def match_token(knowledge,
                 trained: List[str],
                 remaining_recursions: int):
     if remaining_recursions < 1:
-        return None
+        yield None
 
     # print("#" * (MAX_RECURSIONS - remaining_recursions))
     # print("Input:", input)
@@ -366,18 +396,60 @@ def match_token(knowledge,
 
     if isinstance(expected_first, dict):
         # TODO: check if the dictionary matches the values
-        return (([first_input], [expected_first]), (input[1:], trained[1:]))
+        yield (([first_input]), (input[1:], trained[1:]))
 
     elif isinstance(expected_first, tuple):
         return_type, remixer = expected_first
-        return fitting_return_type(knowledge,
-                                   return_type, remixer,
-                                   input, trained[1:],
-                                   remaining_recursions)
+        for r in fitting_return_type(knowledge,
+                                     return_type, remixer,
+                                     input, trained[1:],
+                                     remaining_recursions):
+            print("-->", r)
+            yield r
 
     elif expected_first == first_input:
-        return (([first_input], [expected_first]), (input[1:], trained[1:]))
+        yield (([first_input]), (input[1:], trained[1:]))
 
+    yield None
+
+
+def get_fit_onwards(knowledge, ast, remaining_input, remaining_output, remaining_recursions):
+    indent = "." + "  " * (MAX_RECURSIONS - remaining_recursions)
+    try:
+        # TODO: merge with get_return type, as uses the same mechanism
+        if len(remaining_output) > 0:
+            for (elements,
+                 (input_for_next_level,
+                  output_for_next_level)) in match_token(knowledge,
+                                                         remaining_input,
+                                                         remaining_output,
+                                                         remaining_recursions):
+                print("Nli:", input_for_next_level)
+                print("Nlo:", output_for_next_level)
+                print(indent + "E", elements)
+                try:
+                    result = get_fit_onwards(knowledge, ast, input_for_next_level, output_for_next_level, remaining_recursions)
+                    print(indent + "→", result)
+                    lower_elements, _ = result
+                    print("<<<<< ELM:", elements, lower_elements)
+                    return elements + lower_elements, ast
+                except TypeError as e:
+                    print(indent + "X    " + str(e))
+                except IndexError as e:
+                    print(indent + "X    " + str(e))
+
+            else:
+                print(indent + "Ri:", remaining_input)
+                print(indent + "Ro:", remaining_output)
+                print("OK")
+        elif len(remaining_input) == 0 and len(remaining_input) == 0:
+            print("<<<<< AST:", ast)
+            return [], ast
+
+    except TypeError as e:
+        print(indent + "X    " + str(e))
+    except IndexError as e:
+        print(indent + "X    " + str(e))
     return None
 
 
@@ -392,25 +464,14 @@ def get_fit(knowledge, row, remaining_recursions=MAX_RECURSIONS):
             remaining_output = copy.deepcopy(sample)
             print(indent + "AST:", ast)
             print(indent + "S:", sample)
-
-            # TODO: merge with get_return type, as uses the same mechanism
-            while len(remaining_output) > 0:
-                ((_, _), (remaining_input, remaining_output)) = match_token(knowledge,
-                                                                            remaining_input,
-                                                                            remaining_output,
-                                                                            remaining_recursions)
-                print(indent + "Ri:", remaining_input)
-                print(indent + "Ro:", remaining_output)
-
-            if len(remaining_input) == 0 and len(remaining_input) == 0:
-                print("!!!", tokens, sample, ast)
-                return tokens, sample, ast
+            result = get_fit_onwards(knowledge, ast, remaining_input,
+                                     remaining_output, remaining_recursions)
+            if result is not None:
+                return result
         except TypeError as e:
             print(indent + "X    " + str(e))
-            pass
         except IndexError as e:
             print(indent + "X    " + str(e))
-            pass
         print()
     else:
         return None
diff --git a/naive-nlu/test.py b/naive-nlu/test.py
index ab62e73..e27dd95 100644
--- a/naive-nlu/test.py
+++ b/naive-nlu/test.py
@@ -16,30 +16,34 @@ examples = [
         "text": "lava is dangerous",
         "parsed": ("exists-property-with-value", 'lava', 'dangerous')
     },
-    # {
-    #     "text": "is lava dangerous?",
-    #     "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')),
-    # },
+    {
+        "text": "is lava dangerous?",
+        "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')),
+    },
     {
         "text": "earth is a planet",
         "parsed": ("pertenence-to-group", 'earth', 'planet'),
     },
-    # {
-    #     "text": "is earth a moon?",
-    #     "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')),
-    # },
-    # {
-    #     "text": "Green is a color",
-    #     "parsed": ("pertenence-to-group", 'green', 'color'),
-    # },
-    # {
-    #     "text": "a plane can fly",
-    #     "parsed": ("has-capacity", 'plane', 'fly')
-    # },
-    # {
-    #     "text": "a wale can swim",
-    #     "parsed": ("has-capacity", 'wale', 'swim')
-    # },
+    {
+        "text": "io is a moon",
+        "parsed": ("pertenence-to-group", 'io', 'moon'),
+    },
+    {
+        "text": "is earth a moon?",
+        "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')),
+    },
+    {
+        "text": "Green is a color",
+        "parsed": ("pertenence-to-group", 'green', 'color'),
+    },
+    {
+        "text": "a plane can fly",
+        "parsed": ("has-capacity", 'plane', 'fly')
+    },
+    {
+        "text": "a wale can swim",
+        "parsed": ("has-capacity", 'wale', 'swim')
+    },
 ]
 
 base_knowledge = {
@@ -52,6 +56,9 @@ base_knowledge = {
     'earth': {
         "groups": set(['noun', 'object', 'planet']),
     },
+    'io': {
+        "groups": set(['noun', 'object']),
+    },
     'green': {
         "groups": set(['noun', 'color', 'concept']),
     },
@@ -75,6 +82,9 @@ base_knowledge = {
     'planet': {
         "groups": set(['noun', 'group']),
     },
+    'moon': {
+        "groups": set(['noun', 'group']),
+    },
     'color': {
         "groups": set(['property', 'group']),
     },
@@ -121,10 +131,17 @@ def main():
     # print('-----')
     # print(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
     # print('-----')
-    # queryTrue = { "text": "is io a moon?", "parsed": ("question", ("pertenence-to-group", "io", "moon")) }
-    # queryFalse = { "text": "is io a planet?", "parsed": ("question", ("pertenence-to-group", "io", "planet")) }
+    queryTrue = {
+        "text": "is io a moon?",
+        "parsed": ("question", ("pertenence-to-group", "io", "moon"))
+    }
+    queryFalse = {
+        "text": "is io a planet?",
+        "parsed": ("question", ("pertenence-to-group", "io", "planet"))
+    }
 
-    # test_assumption(False, knowledge, queryFalse)
+    test_assumption(False, knowledge, queryFalse)
+    test_assumption(True, knowledge, queryTrue)
 
 if __name__ == '__main__':
     main()