From 393527590c059d804fe1a579d5dc12e01e97d1ea Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Mon, 22 May 2017 20:20:53 +0200 Subject: [PATCH] Convert the linear exploration into a more tree-like. --- naive-nlu/knowledge_base.py | 9 +- naive-nlu/knowledge_evaluation.py | 26 +++- naive-nlu/modifiable_property.py | 15 +- naive-nlu/parsing.py | 232 ++++++++++-------------------- naive-nlu/test.py | 4 +- 5 files changed, 120 insertions(+), 166 deletions(-) diff --git a/naive-nlu/knowledge_base.py b/naive-nlu/knowledge_base.py index 08d51e3..7750315 100644 --- a/naive-nlu/knowledge_base.py +++ b/naive-nlu/knowledge_base.py @@ -3,7 +3,7 @@ import copy import logging import parsing import knowledge_evaluation -from modifiable_property import ModifiableProperty +from modifiable_property import is_modifiable_property def diff_knowledge(before, after): @@ -50,7 +50,8 @@ class KnowledgeBase(object): def process(self, row): knowledge_before = copy.deepcopy(self.knowledge) logging.debug("\x1b[7;32m> {} \x1b[0m".format(row)) - tokens, inferred_tree = parsing.get_fit(self, row) + tokens = parsing.to_tokens(row) + tokens, inferred_tree = parsing.get_fit(self, tokens) result = knowledge_evaluation.integrate_information(self.knowledge, { "elements": tokens, @@ -65,13 +66,13 @@ class KnowledgeBase(object): return result, inferred_tree, knowledge_diff_getter def get_value(self, result): - if isinstance(result, ModifiableProperty): + if is_modifiable_property(result): return result.getter() else: return result def act_upon(self, result): - if isinstance(result, ModifiableProperty): + if is_modifiable_property(result): result.setter() else: logging.debug(result) diff --git a/naive-nlu/knowledge_evaluation.py b/naive-nlu/knowledge_evaluation.py index 7784b05..9f5d021 100644 --- a/naive-nlu/knowledge_evaluation.py +++ b/naive-nlu/knowledge_evaluation.py @@ -1,4 +1,8 @@ -from modifiable_property import ModifiableProperty +from modifiable_property import ( + ModifiableProperty, + ModifiablePropertyWithAst, + is_modifiable_property, +) def resolve(knowledge_base, elements, value): @@ -122,7 +126,7 @@ def has_capacity(knowledge_base, elements, subj, capacity): def question(knowledge_base, elements, subj): subj = resolve(knowledge_base, elements, subj) - if isinstance(subj, ModifiableProperty): + if is_modifiable_property(subj): return subj.getter() return subj @@ -135,9 +139,21 @@ knowledge_ingestion = { } +def tagged_with_ast(ast, elements, modifiable_property): + if not isinstance(modifiable_property, ModifiableProperty): + return modifiable_property + + return ModifiablePropertyWithAst(modifiable_property.getter, + modifiable_property.setter, + ast, elements) + + def integrate_information(knowledge_base, example): - method = example['parsed'][0] - args = example['parsed'][1:] + ast = example['parsed'] + method = ast[0] + args = ast[1:] elements = example.get('elements', None) - return knowledge_ingestion[method](knowledge_base, elements, *args) + return tagged_with_ast( + ast, elements, + knowledge_ingestion[method](knowledge_base, elements, *args)) diff --git a/naive-nlu/modifiable_property.py b/naive-nlu/modifiable_property.py index 915303a..6a94525 100644 --- a/naive-nlu/modifiable_property.py +++ b/naive-nlu/modifiable_property.py @@ -1,3 +1,16 @@ import collections -ModifiableProperty = collections.namedtuple('ModifiableProperty', ['getter', 'setter']) +ModifiableProperty = collections.namedtuple('ModifiableProperty', + ['getter', 'setter']) + +ModifiablePropertyWithAst = collections.namedtuple('ModifiablePropertyWithAst', + [ + 'getter', + 'setter', + 'ast', + 'elements', + ]) + +def is_modifiable_property(element): + return (isinstance(element, ModifiableProperty) or + isinstance(element, ModifiablePropertyWithAst)) diff --git a/naive-nlu/parsing.py b/naive-nlu/parsing.py index 4044c9a..2133e9e 100644 --- a/naive-nlu/parsing.py +++ b/naive-nlu/parsing.py @@ -320,67 +320,6 @@ def reprocess_language_knowledge(knowledge_base, examples): return pattern_examples -def fitting_return_type(knowledge, - return_type, remixer, - input_stream, - tail_of_ouput_stream, - remaining_recursions: int): - indent = " " + " " * (parameters.MAX_RECURSIONS - remaining_recursions) - - for sample, ast in knowledge.trained: - try: - parsed_input = [] - parsed_output = [] - - remaining_input = reverse_remix(input_stream, remixer) - logging.debug(indent + "RMXin:", remaining_input) - remaining_output = copy.deepcopy(sample) - - logging.debug(indent + "S:", sample) - logging.debug(indent + "A:", ast) - logging.debug("---") - - while len(remaining_output) > 0: - for (elements, - (remaining_input, - remaining_output)) in match_token(knowledge, - remaining_input, - remaining_output, - remaining_recursions - 1): - parsed_input += elements - logging.debug(indent + "Elements:", elements) - break - - logging.debug(indent + "Pi:", parsed_input) - logging.debug(indent + "Po:", parsed_output) - logging.debug("\x1b[7m" + indent + "Ri:", - remaining_input, - "\x1b[0m") - logging.debug("\x1b[7m" + indent + "Ro:", - remaining_output + tail_of_ouput_stream, - "\x1b[0m") - logging.debug("---") - resolved_input = knowledge_evaluation.resolve(knowledge.knowledge, - parsed_input, ast) - if isinstance(resolved_input, ModifiableProperty): - resolved_input = resolved_input.getter() - yield ([resolved_input], - (remaining_input, remaining_output + tail_of_ouput_stream)) - except TypeError as e: - logging.debug(indent + "X " + str(e)) - pass - except ValueError as e: - logging.debug(indent + "X " + str(e)) - pass - except IndexError as e: - logging.debug(indent + "X " + str(e)) - pass - except KeyError as e: - logging.debug(indent + "X " + str(e)) - pass - raise TypeError("No matching type found") - - def reverse_remix(tree_section, remix): result_section = [] for origin in remix: @@ -388,101 +327,86 @@ def reverse_remix(tree_section, remix): return result_section + tree_section[len(remix):] -def match_token(knowledge, - input: List[str], - trained: List[str], - remaining_recursions: int): - if remaining_recursions < 1: - yield None +def get_fit(knowledge, tokens, remaining_recursions=parameters.MAX_RECURSIONS): + for matcher, ast in knowledge.trained: + result = match_fit(knowledge, tokens, matcher, ast, + remaining_recursions) + if result is not None: + return result - # logging.debug("#" * (parameters.MAX_RECURSIONS - remaining_recursions)) - # logging.debug("Input:", input) - # logging.debug("Output:", trained) - depth_meter.show_depth(parameters.MAX_RECURSIONS - remaining_recursions) - indent = " " + " " * (parameters.MAX_RECURSIONS - remaining_recursions) - first_input = input[0] - expected_first = trained[0] - logging.debug(indent + "Ex?", expected_first) - logging.debug(indent + "Fo!", first_input) - - if isinstance(expected_first, dict): - # TODO: check if the dictionary matches the values - yield (([first_input]), (input[1:], trained[1:])) - - elif isinstance(expected_first, tuple): - return_type, remixer = expected_first - for r in fitting_return_type(knowledge, - return_type, remixer, - input, trained[1:], - remaining_recursions): - logging.debug("-->", r) - yield r - - elif expected_first == first_input: - yield (([first_input]), (input[1:], trained[1:])) - - yield None - - -def get_fit_onwards(knowledge, ast, remaining_input, remaining_output, remaining_recursions): - indent = "." + " " * (parameters.MAX_RECURSIONS - remaining_recursions) - try: - # TODO: merge with get_return type, as uses the same mechanism - if len(remaining_output) > 0: - for (elements, - (input_for_next_level, - output_for_next_level)) in match_token(knowledge, - remaining_input, - remaining_output, - remaining_recursions): - logging.debug("Nli:", input_for_next_level) - logging.debug("Nlo:", output_for_next_level) - logging.debug(indent + "E", elements) - try: - result = get_fit_onwards(knowledge, ast, input_for_next_level, output_for_next_level, remaining_recursions) - logging.debug(indent + "→", result) - lower_elements, _ = result - logging.debug("<<<<< ELM:", elements, lower_elements) - return elements + lower_elements, ast - except TypeError as e: - logging.debug(indent + "X " + str(e)) - except IndexError as e: - logging.debug(indent + "X " + str(e)) - - else: - logging.debug(indent + "Ri:", remaining_input) - logging.debug(indent + "Ro:", remaining_output) - logging.debug("OK") - elif len(remaining_input) == 0 and len(remaining_input) == 0: - logging.debug("<<<<< AST:", ast) - return [], ast - - except TypeError as e: - logging.debug(indent + "X " + str(e)) - except IndexError as e: - logging.debug(indent + "X " + str(e)) return None -def get_fit(knowledge, row, remaining_recursions=parameters.MAX_RECURSIONS): - tokens = to_tokens(row) - indent = " " * (parameters.MAX_RECURSIONS - remaining_recursions) - for sample, ast in knowledge.trained: - logging.debug("-----") - logging.debug("TOK:", tokens) - try: - remaining_input = copy.deepcopy(tokens) - remaining_output = copy.deepcopy(sample) - logging.debug(indent + "AST:", ast) - logging.debug(indent + "S:", sample) - result = get_fit_onwards(knowledge, ast, remaining_input, - remaining_output, remaining_recursions) - if result is not None: - return result - except TypeError as e: - logging.debug(indent + "X " + str(e)) - except IndexError as e: - logging.debug(indent + "X " + str(e)) - logging.debug("---") - else: +def is_definite_minisegment(minisegment): + return isinstance(minisegment, str) or isinstance(minisegment, dict) + + +def match_token(knowledge, next_token, minisegment): + if isinstance(minisegment, dict): + # TODO: check if the dictionary matches the values + return True + elif isinstance(minisegment, str): + # TODO: check if the two elements can be used in each other place + return next_token == minisegment + + return False + + +def resolve_fit(knowledge, fit, remaining_recursions): + fitted = [] + for element in fit: + if is_definite_minisegment(element): + fitted.append(element) + else: + ((result_type, remixer), tokens) = element + remixed_tokens = reverse_remix(tokens, remixer) + minifit = get_fit(knowledge, remixed_tokens, remaining_recursions - 1) + if minifit is None: + return None + + minitokens, miniast = minifit + subproperty = knowledge_evaluation.resolve(knowledge.knowledge, minitokens, miniast) + fitted.append(subproperty) + + return fitted + + +def match_fit(knowledge, tokens, matcher, ast, remaining_recursions): + segment_possibilities = [([], tokens)] # Matched tokens, remaining tokens + for minisegment in matcher: + possibilities_after_round = [] + for matched_tokens, remaining_tokens in segment_possibilities: + if len(remaining_tokens) < 1: + continue + + if is_definite_minisegment(minisegment): + if match_token(knowledge, remaining_tokens[0], minisegment): + possibilities_after_round.append(( + matched_tokens + [remaining_tokens[0]], + remaining_tokens[1:] + )) + else: + # TODO: optimize this with a look ahead + for i in range(1, len(tokens)): + possibilities_after_round.append(( + matched_tokens + [(minisegment, remaining_tokens[:i])], + remaining_tokens[i:] + )) + else: + segment_possibilities = possibilities_after_round + + fully_matched_segments = [(matched, remaining) + for (matched, remaining) + in segment_possibilities + if len(remaining) == 0] + + resolved_fits = [] + for fit, _ in fully_matched_segments: + resolved_fit = resolve_fit(knowledge, fit, remaining_recursions) + if resolved_fit is not None: + resolved_fits.append(resolved_fit) + + if len(resolved_fits) == 0: return None + + return resolved_fits[0], ast diff --git a/naive-nlu/test.py b/naive-nlu/test.py index c213591..d6692d9 100644 --- a/naive-nlu/test.py +++ b/naive-nlu/test.py @@ -2,7 +2,7 @@ import json import logging from knowledge_base import KnowledgeBase -from modifiable_property import ModifiableProperty +from modifiable_property import is_modifiable_property examples = [ { @@ -103,7 +103,7 @@ def test_assumption(expectedResponse, knowledge, query): logging.info("Expected: {}".format(expectedResponse)) result, abstract_tree, diff = knowledge.process(query['text']) - end_result = result.getter() if isinstance(result, ModifiableProperty) else result + end_result = result.getter() if is_modifiable_property(result) else result logging.info("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result)) assert(end_result == expectedResponse)