Also use matching tokens to score tree similarity.

This commit is contained in:
kenkeiras 2017-05-24 22:09:26 +02:00
parent 75d690120b
commit e6cbb54382

View File

@ -88,7 +88,7 @@ def integrate_language(knowledge_base, example):
for position, atom in lower_levels: for position, atom in lower_levels:
logging.debug("\x1b[1mSelecting\x1b[0m: {}".format(atom)) logging.debug("\x1b[1mSelecting\x1b[0m: {}".format(atom))
similar = get_similar_tree(knowledge_base, atom) similar = get_similar_tree(knowledge_base, atom, tokens)
remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar) remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar)
_, matcher, result = make_template(knowledge_base, tokens, atom) _, matcher, result = make_template(knowledge_base, tokens, atom)
logging.debug("Tx: {}".format(tokens)) logging.debug("Tx: {}".format(tokens))
@ -138,7 +138,7 @@ def apply_remix(tokens, remix):
def build_remix_matrix(knowledge_base, tokens, atom, similar): def build_remix_matrix(knowledge_base, tokens, atom, similar):
tokens = list(tokens) tokens = list(tokens)
tokens, matcher, result = make_template(knowledge_base, tokens, atom) tokens, matcher, result = make_template(knowledge_base, tokens, atom)
similar_matcher, similar_result, similar_result_resolved, _ = similar similar_matcher, similar_result, similar_result_resolved, _, _ = similar
start_bounds, end_bounds = find_bounds(matcher, similar_matcher) start_bounds, end_bounds = find_bounds(matcher, similar_matcher)
@ -219,7 +219,7 @@ def find_bounds(matcher, similar_matcher):
return start_bounds, end_bounds return start_bounds, end_bounds
def get_similar_tree(knowledge_base, atom): def get_similar_tree(knowledge_base, atom, tokens):
possibilities = [] possibilities = []
# Find matching possibilities # Find matching possibilities
@ -243,12 +243,17 @@ def get_similar_tree(knowledge_base, atom):
raw)) raw))
# TODO: Probably should take into account the categories of the elements in the "intake" ([0]) element # TODO: Probably should take into account the categories of the elements in the "intake" ([0]) element
score = sum([resolved[i] == atom[i] atom_score = sum([resolved[i] == atom[i]
for i for i
in range(min(len(resolved), in range(min(len(resolved),
len(atom)))]) len(atom)))])
sorted_possibilities.append((raw, possibility, resolved, score)) token_score = sum([similar_token in tokens
sorted_possibilities = sorted(sorted_possibilities, key=lambda p: p[3], reverse=True) for similar_token
in raw])
sorted_possibilities.append((raw, possibility, resolved, atom_score, token_score))
sorted_possibilities = sorted(sorted_possibilities, key=lambda p: p[3] * 100 + p[4], reverse=True)
if len(sorted_possibilities) < 1: if len(sorted_possibilities) < 1:
return None return None