diff --git a/naive-nlu/tree_nlu/parsing.py b/naive-nlu/tree_nlu/parsing.py index 6e1fe30..ed5903a 100644 --- a/naive-nlu/tree_nlu/parsing.py +++ b/naive-nlu/tree_nlu/parsing.py @@ -88,7 +88,7 @@ def integrate_language(knowledge_base, example): for position, atom in lower_levels: logging.debug("\x1b[1mSelecting\x1b[0m: {}".format(atom)) - similar = get_similar_tree(knowledge_base, atom) + similar = get_similar_tree(knowledge_base, atom, tokens) remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar) _, matcher, result = make_template(knowledge_base, tokens, atom) logging.debug("Tx: {}".format(tokens)) @@ -138,7 +138,7 @@ def apply_remix(tokens, remix): def build_remix_matrix(knowledge_base, tokens, atom, similar): tokens = list(tokens) tokens, matcher, result = make_template(knowledge_base, tokens, atom) - similar_matcher, similar_result, similar_result_resolved, _ = similar + similar_matcher, similar_result, similar_result_resolved, _, _ = similar start_bounds, end_bounds = find_bounds(matcher, similar_matcher) @@ -219,7 +219,7 @@ def find_bounds(matcher, similar_matcher): return start_bounds, end_bounds -def get_similar_tree(knowledge_base, atom): +def get_similar_tree(knowledge_base, atom, tokens): possibilities = [] # Find matching possibilities @@ -243,12 +243,17 @@ def get_similar_tree(knowledge_base, atom): raw)) # TODO: Probably should take into account the categories of the elements in the "intake" ([0]) element - score = sum([resolved[i] == atom[i] + atom_score = sum([resolved[i] == atom[i] for i in range(min(len(resolved), len(atom)))]) - sorted_possibilities.append((raw, possibility, resolved, score)) - sorted_possibilities = sorted(sorted_possibilities, key=lambda p: p[3], reverse=True) + token_score = sum([similar_token in tokens + for similar_token + in raw]) + + sorted_possibilities.append((raw, possibility, resolved, atom_score, token_score)) + + sorted_possibilities = sorted(sorted_possibilities, key=lambda p: p[3] * 100 + p[4], reverse=True) if len(sorted_possibilities) < 1: return None