Lean on knowledge too when defining bounds.

2017-05-24 22:15:56 +02:00 · 2017-05-24 22:15:56 +02:00 · 89b281fd6f
commit 89b281fd6f
parent 7cdf8a310d
1 changed files with 23 additions and 5 deletions
--- a/naive-nlu/tree_nlu/parsing.py
+++ b/naive-nlu/tree_nlu/parsing.py
@ -8,7 +8,7 @@ import re
 import copy

 from functools import reduce
-from typing import List
+from typing import List, Dict
 from .modifiable_property import ModifiableProperty
 from . import parameters

@ -95,6 +95,8 @@ def integrate_language(knowledge_base, example):
            logging.debug("Mx: {}".format(matcher))
            logging.debug("Rx: {}".format(result))
            logging.debug("Remix: {}".format(remix))
+            logging.debug("Sx: {}".format(start_bounds))
+            logging.debug("Ex: {}".format(end_bounds))

            after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
            assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens))
@ -140,7 +142,7 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
    tokens, matcher, result = make_template(knowledge_base, tokens, atom)
    similar_matcher, similar_result, similar_result_resolved, _, _ = similar

-    start_bounds, end_bounds = find_bounds(matcher, similar_matcher)
+    start_bounds, end_bounds = find_bounds(knowledge_base, matcher, similar_matcher)

    for i, element in (end_bounds + start_bounds[::-1]):
        matcher.pop(i)
@ -195,13 +197,21 @@ def all_matching_indexes(knowledge_base, collection, element):
            instance = knowledge_base.knowledge[instance]["groups"]

        intersection = set(instance) & set(element)
-        if len(intersection) > 0:
+        if (len(intersection) > 0 or (0 == len(instance) == len(element))):
            indexes.append((i, intersection))

    return [x[0] for x in sorted(indexes, key=lambda x: len(x[1]), reverse=True)]


-def find_bounds(matcher, similar_matcher):
+def element_matches_groups(knowledge, element: Dict, groups):
+    if isinstance(groups, str) and groups in knowledge:
+        return len(knowledge[element].get("groups", set()) & element['groups']) > 0
+    elif isinstance(groups, dict):
+        return len(element.get("groups", set()) & element['groups']) > 0
+    return False
+
+
+def find_bounds(knowledge, matcher, similar_matcher):
    start_bounds = []
    for i, element in enumerate(matcher):
        if element in similar_matcher:
@ -211,7 +221,15 @@ def find_bounds(matcher, similar_matcher):

    end_bounds = []
    for i, element in enumerate(matcher[::-1]):
-        if element in similar_matcher:
+        in_similar = False
+        if isinstance(element, str):
+            in_similar = element in similar_matcher
+        elif isinstance(element, dict):
+            in_similar = any(map(lambda groups: element_matches_groups(knowledge.knowledge,
+                                                                       element, groups),
+                                 similar_matcher))
+
+        if in_similar:
            break
        else:
            end_bounds.append((len(matcher) - (i + 1), element))