Convert the linear exploration into a more tree-like.

2017-05-22 20:20:53 +02:00 · 2017-05-22 20:20:53 +02:00 · 393527590c
commit 393527590c
parent ae8d717344
5 changed files with 120 additions and 166 deletions
--- a/naive-nlu/parsing.py
+++ b/naive-nlu/parsing.py
@ -320,67 +320,6 @@ def reprocess_language_knowledge(knowledge_base, examples):
    return pattern_examples


-def fitting_return_type(knowledge,
-                        return_type, remixer,
-                        input_stream,
-                        tail_of_ouput_stream,
-                        remaining_recursions: int):
-    indent = "  " + "  " * (parameters.MAX_RECURSIONS - remaining_recursions)
-
-    for sample, ast in knowledge.trained:
-        try:
-            parsed_input = []
-            parsed_output = []
-
-            remaining_input = reverse_remix(input_stream, remixer)
-            logging.debug(indent + "RMXin:", remaining_input)
-            remaining_output = copy.deepcopy(sample)
-
-            logging.debug(indent + "S:", sample)
-            logging.debug(indent + "A:", ast)
-            logging.debug("---")
-
-            while len(remaining_output) > 0:
-                for (elements,
-                     (remaining_input,
-                      remaining_output)) in match_token(knowledge,
-                                                        remaining_input,
-                                                        remaining_output,
-                                                        remaining_recursions - 1):
-                    parsed_input += elements
-                    logging.debug(indent + "Elements:", elements)
-                    break
-
-            logging.debug(indent + "Pi:", parsed_input)
-            logging.debug(indent + "Po:", parsed_output)
-            logging.debug("\x1b[7m" + indent + "Ri:",
-                  remaining_input,
-                  "\x1b[0m")
-            logging.debug("\x1b[7m" + indent + "Ro:",
-                  remaining_output + tail_of_ouput_stream,
-                  "\x1b[0m")
-            logging.debug("---")
-            resolved_input = knowledge_evaluation.resolve(knowledge.knowledge,
-                                                          parsed_input, ast)
-            if isinstance(resolved_input, ModifiableProperty):
-                resolved_input = resolved_input.getter()
-            yield ([resolved_input],
-                   (remaining_input, remaining_output + tail_of_ouput_stream))
-        except TypeError as e:
-            logging.debug(indent + "X    " + str(e))
-            pass
-        except ValueError as e:
-            logging.debug(indent + "X    " + str(e))
-            pass
-        except IndexError as e:
-            logging.debug(indent + "X    " + str(e))
-            pass
-        except KeyError as e:
-            logging.debug(indent + "X    " + str(e))
-            pass
-    raise TypeError("No matching type found")
-
-
 def reverse_remix(tree_section, remix):
    result_section = []
    for origin in remix:
@ -388,101 +327,86 @@ def reverse_remix(tree_section, remix):
    return result_section + tree_section[len(remix):]


-def match_token(knowledge,
-                input: List[str],
-                trained: List[str],
-                remaining_recursions: int):
-    if remaining_recursions < 1:
-        yield None
+def get_fit(knowledge, tokens, remaining_recursions=parameters.MAX_RECURSIONS):
+    for matcher, ast in knowledge.trained:
+        result = match_fit(knowledge, tokens, matcher, ast,
+                           remaining_recursions)
+        if result is not None:
+            return result

-    # logging.debug("#" * (parameters.MAX_RECURSIONS - remaining_recursions))
-    # logging.debug("Input:", input)
-    # logging.debug("Output:", trained)
-    depth_meter.show_depth(parameters.MAX_RECURSIONS - remaining_recursions)
-    indent = " " + "  " * (parameters.MAX_RECURSIONS - remaining_recursions)
-    first_input = input[0]
-    expected_first = trained[0]
-    logging.debug(indent + "Ex?", expected_first)
-    logging.debug(indent + "Fo!", first_input)
-
-    if isinstance(expected_first, dict):
-        # TODO: check if the dictionary matches the values
-        yield (([first_input]), (input[1:], trained[1:]))
-
-    elif isinstance(expected_first, tuple):
-        return_type, remixer = expected_first
-        for r in fitting_return_type(knowledge,
-                                     return_type, remixer,
-                                     input, trained[1:],
-                                     remaining_recursions):
-            logging.debug("-->", r)
-            yield r
-
-    elif expected_first == first_input:
-        yield (([first_input]), (input[1:], trained[1:]))
-
-    yield None
-
-
-def get_fit_onwards(knowledge, ast, remaining_input, remaining_output, remaining_recursions):
-    indent = "." + "  " * (parameters.MAX_RECURSIONS - remaining_recursions)
-    try:
-        # TODO: merge with get_return type, as uses the same mechanism
-        if len(remaining_output) > 0:
-            for (elements,
-                 (input_for_next_level,
-                  output_for_next_level)) in match_token(knowledge,
-                                                         remaining_input,
-                                                         remaining_output,
-                                                         remaining_recursions):
-                logging.debug("Nli:", input_for_next_level)
-                logging.debug("Nlo:", output_for_next_level)
-                logging.debug(indent + "E", elements)
-                try:
-                    result = get_fit_onwards(knowledge, ast, input_for_next_level, output_for_next_level, remaining_recursions)
-                    logging.debug(indent + "→", result)
-                    lower_elements, _ = result
-                    logging.debug("<<<<< ELM:", elements, lower_elements)
-                    return elements + lower_elements, ast
-                except TypeError as e:
-                    logging.debug(indent + "X    " + str(e))
-                except IndexError as e:
-                    logging.debug(indent + "X    " + str(e))
-
-            else:
-                logging.debug(indent + "Ri:", remaining_input)
-                logging.debug(indent + "Ro:", remaining_output)
-                logging.debug("OK")
-        elif len(remaining_input) == 0 and len(remaining_input) == 0:
-            logging.debug("<<<<< AST:", ast)
-            return [], ast
-
-    except TypeError as e:
-        logging.debug(indent + "X    " + str(e))
-    except IndexError as e:
-        logging.debug(indent + "X    " + str(e))
    return None


-def get_fit(knowledge, row, remaining_recursions=parameters.MAX_RECURSIONS):
-    tokens = to_tokens(row)
-    indent = "  " * (parameters.MAX_RECURSIONS - remaining_recursions)
-    for sample, ast in knowledge.trained:
-        logging.debug("-----")
-        logging.debug("TOK:", tokens)
-        try:
-            remaining_input = copy.deepcopy(tokens)
-            remaining_output = copy.deepcopy(sample)
-            logging.debug(indent + "AST:", ast)
-            logging.debug(indent + "S:", sample)
-            result = get_fit_onwards(knowledge, ast, remaining_input,
-                                     remaining_output, remaining_recursions)
-            if result is not None:
-                return result
-        except TypeError as e:
-            logging.debug(indent + "X    " + str(e))
-        except IndexError as e:
-            logging.debug(indent + "X    " + str(e))
-        logging.debug("---")
-    else:
+def is_definite_minisegment(minisegment):
+    return isinstance(minisegment, str) or isinstance(minisegment, dict)
+
+
+def match_token(knowledge, next_token, minisegment):
+    if isinstance(minisegment, dict):
+        # TODO: check if the dictionary matches the values
+        return True
+    elif isinstance(minisegment, str):
+        # TODO: check if the two elements can be used in each other place
+        return next_token == minisegment
+
+    return False
+
+
+def resolve_fit(knowledge, fit, remaining_recursions):
+    fitted = []
+    for element in fit:
+        if is_definite_minisegment(element):
+            fitted.append(element)
+        else:
+            ((result_type, remixer), tokens) = element
+            remixed_tokens = reverse_remix(tokens, remixer)
+            minifit = get_fit(knowledge, remixed_tokens, remaining_recursions - 1)
+            if minifit is None:
+                return None
+
+            minitokens, miniast = minifit
+            subproperty = knowledge_evaluation.resolve(knowledge.knowledge, minitokens, miniast)
+            fitted.append(subproperty)
+
+    return fitted
+
+
+def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
+    segment_possibilities = [([], tokens)]  # Matched tokens, remaining tokens
+    for minisegment in matcher:
+        possibilities_after_round = []
+        for matched_tokens, remaining_tokens in segment_possibilities:
+            if len(remaining_tokens) < 1:
+                continue
+
+            if is_definite_minisegment(minisegment):
+                if match_token(knowledge, remaining_tokens[0], minisegment):
+                    possibilities_after_round.append((
+                        matched_tokens + [remaining_tokens[0]],
+                        remaining_tokens[1:]
+                    ))
+            else:
+                # TODO: optimize this with a look ahead
+                for i in range(1, len(tokens)):
+                    possibilities_after_round.append((
+                        matched_tokens + [(minisegment, remaining_tokens[:i])],
+                        remaining_tokens[i:]
+                    ))
+        else:
+            segment_possibilities = possibilities_after_round
+
+    fully_matched_segments = [(matched, remaining)
+                              for (matched, remaining)
+                              in segment_possibilities
+                              if len(remaining) == 0]
+
+    resolved_fits = []
+    for fit, _ in fully_matched_segments:
+        resolved_fit = resolve_fit(knowledge, fit, remaining_recursions)
+        if resolved_fit is not None:
+            resolved_fits.append(resolved_fit)
+
+    if len(resolved_fits) == 0:
        return None
+
+    return resolved_fits[0], ast