Lean on knowledge too when defining bounds.

This commit is contained in:
kenkeiras 2017-05-24 22:15:56 +02:00
parent 7cdf8a310d
commit 89b281fd6f

View File

@ -8,7 +8,7 @@ import re
import copy
from functools import reduce
from typing import List
from typing import List, Dict
from .modifiable_property import ModifiableProperty
from . import parameters
@ -95,6 +95,8 @@ def integrate_language(knowledge_base, example):
logging.debug("Mx: {}".format(matcher))
logging.debug("Rx: {}".format(result))
logging.debug("Remix: {}".format(remix))
logging.debug("Sx: {}".format(start_bounds))
logging.debug("Ex: {}".format(end_bounds))
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens))
@ -140,7 +142,7 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
tokens, matcher, result = make_template(knowledge_base, tokens, atom)
similar_matcher, similar_result, similar_result_resolved, _, _ = similar
start_bounds, end_bounds = find_bounds(matcher, similar_matcher)
start_bounds, end_bounds = find_bounds(knowledge_base, matcher, similar_matcher)
for i, element in (end_bounds + start_bounds[::-1]):
matcher.pop(i)
@ -195,13 +197,21 @@ def all_matching_indexes(knowledge_base, collection, element):
instance = knowledge_base.knowledge[instance]["groups"]
intersection = set(instance) & set(element)
if len(intersection) > 0:
if (len(intersection) > 0 or (0 == len(instance) == len(element))):
indexes.append((i, intersection))
return [x[0] for x in sorted(indexes, key=lambda x: len(x[1]), reverse=True)]
def find_bounds(matcher, similar_matcher):
def element_matches_groups(knowledge, element: Dict, groups):
if isinstance(groups, str) and groups in knowledge:
return len(knowledge[element].get("groups", set()) & element['groups']) > 0
elif isinstance(groups, dict):
return len(element.get("groups", set()) & element['groups']) > 0
return False
def find_bounds(knowledge, matcher, similar_matcher):
start_bounds = []
for i, element in enumerate(matcher):
if element in similar_matcher:
@ -211,7 +221,15 @@ def find_bounds(matcher, similar_matcher):
end_bounds = []
for i, element in enumerate(matcher[::-1]):
if element in similar_matcher:
in_similar = False
if isinstance(element, str):
in_similar = element in similar_matcher
elif isinstance(element, dict):
in_similar = any(map(lambda groups: element_matches_groups(knowledge.knowledge,
element, groups),
similar_matcher))
if in_similar:
break
else:
end_bounds.append((len(matcher) - (i + 1), element))