From 9d49d0068820d6144056f075521c5f15fd4d68c0 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Sun, 28 May 2017 22:39:04 +0200 Subject: [PATCH] Work in progress. * Test * More debugging * Base concept --- naive-nlu/tree_nlu/knowledge_evaluation.py | 15 ++++++++ naive-nlu/tree_nlu/parsing.py | 40 +++++++++++++++++++--- naive-nlu/tree_nlu/test.py | 2 +- naive-nlu/tree_nlu/tests/gac_100.py | 15 +++++--- 4 files changed, 62 insertions(+), 10 deletions(-) diff --git a/naive-nlu/tree_nlu/knowledge_evaluation.py b/naive-nlu/tree_nlu/knowledge_evaluation.py index 031fd2d..bbc8ef0 100644 --- a/naive-nlu/tree_nlu/knowledge_evaluation.py +++ b/naive-nlu/tree_nlu/knowledge_evaluation.py @@ -1,3 +1,5 @@ +import logging + from .modifiable_property import ( ModifiableProperty, ModifiablePropertyWithAst, @@ -9,6 +11,7 @@ def resolve(knowledge_base, elements, value): if isinstance(value, int): return elements[value] elif isinstance(value, tuple) or isinstance(value, list): + print("V:", value, elements) return integrate_information(knowledge_base, { "elements": elements, "parsed": value, @@ -100,11 +103,17 @@ def exists_property_with_value(knowledge_base, elements, subj, value): def modifiable_element_for_existance_in_set(container, set_name, element): + print("-----({} {} {})".format(container, set_name, element)) + import traceback + # traceback.print_stack() + def getter(): nonlocal container, set_name, element + print(" get({} {} {})".format(container, set_name, element)) return (set_name in container) and (element in container[set_name]) def setter(): + print(" add({} {} {})".format(container, set_name, element)) nonlocal container, set_name, element return container[set_name].add(element) @@ -212,6 +221,7 @@ def perform_verb_over_object(knowledge_base, elements, subj, verb, obj): subj = resolve(knowledge_base, elements, subj) verb = resolve(knowledge_base, elements, verb) obj = resolve(knowledge_base, elements, obj) + logging.debug("({} {} {})".format(verb, subj, obj)) if subj not in knowledge_base: knowledge_base[subj] = {'groups': set()} @@ -255,6 +265,11 @@ def integrate_information(knowledge_base, example): args = ast[1:] elements = example.get('elements', None) + logging.debug("Integrating:") + logging.debug("AST: {}".format(ast)) + logging.debug("ARG: {}".format(elements)) + logging.debug("------------") + return tagged_with_ast( ast, elements, knowledge_ingestion[method](knowledge_base, elements, *args)) diff --git a/naive-nlu/tree_nlu/parsing.py b/naive-nlu/tree_nlu/parsing.py index 265bd59..c5b71fb 100644 --- a/naive-nlu/tree_nlu/parsing.py +++ b/naive-nlu/tree_nlu/parsing.py @@ -20,6 +20,9 @@ def to_tokens(text): def make_template(knowledge_base, tokens, parsed): matcher = list(tokens) template = list(parsed) + logging.debug(" -- MK TEMPLATE --") + logging.debug("MATCHR: {}".format(matcher)) + logging.debug("TEMPLT: {}".format(template)) for i in range(len(matcher)): word = matcher[i] if word in template: @@ -56,6 +59,11 @@ def get_lower_levels(parsed): # TODO: probably optimize this, it creates lots of unnecessary tuples def replace_position(tree, position, new_element): + logging.debug("REPLACE POSITIONS:") + logging.debug(" TREE : {}".format(tree)) + logging.debug("POSITION: {}".format(position)) + logging.debug("NEW ELEM: {}".format(new_element)) + logging.debug("------------------") def aux(current_tree, remaining_route): if len(remaining_route) == 0: @@ -69,7 +77,9 @@ def replace_position(tree, position, new_element): + tree[step + 2:] ) - return aux(tree, position) + result = aux(tree, position) + logging.debug("-RESULT: {}".format(result)) + return result def integrate_language(knowledge_base, example): @@ -90,15 +100,23 @@ def integrate_language(knowledge_base, example): logging.debug("\x1b[1mSelecting\x1b[0m: {}".format(atom)) similar = get_similar_tree(knowledge_base, atom, tokens) remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar) - _, matcher, result = make_template(knowledge_base, tokens, atom) - logging.debug("Tx: {}".format(tokens)) + + after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix) + logging.debug("--FIND MIX--") + logging.debug("-MIX- | {}".format(remix)) + logging.debug("-FRM- | {}".format(tokens)) + logging.debug("-AFT- | {}".format(after_remix)) + + print() + + _, matcher, result = make_template(knowledge_base, after_remix, atom) + logging.debug("Tx: {}".format(after_remix)) logging.debug("Mx: {}".format(matcher)) logging.debug("Rx: {}".format(result)) - logging.debug("Remix: {}".format(remix)) logging.debug("Sx: {}".format(start_bounds)) logging.debug("Ex: {}".format(end_bounds)) - after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix) + assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens)) logging.debug( " +-> {}".format(after_remix)) subquery_type = knowledge_evaluation.get_subquery_type(knowledge_base.knowledge, atom) @@ -115,6 +133,8 @@ def integrate_language(knowledge_base, example): tokens = new_tokens resolved_parsed = replace_position(resolved_parsed, position, offset) + logging.debug("RP: {}".format(resolved_parsed)) + logging.debug("AT: {}".format(atom)) logging.debug("#########") @@ -382,11 +402,20 @@ def resolve_fit(knowledge, fit, remaining_recursions): if remixed_tokens is None: return None + # if len(tokens) == 3 and tokens[2] == 'electricity': + # logging.debug("--UNMIX--") + # logging.debug("-MIX- | {}".format(remixer)) + # logging.debug("REMIX | {}".format(tokens)) + # logging.debug(" T O | {}".format(remixed_tokens)) + # if remixer != [0, 1, 2]: + # return None + minifit = get_fit(knowledge, remixed_tokens, remaining_recursions - 1) if minifit is None: return None minitokens, miniast = minifit + logging.debug(" AST | {}".format(miniast)) subproperty = knowledge_evaluation.resolve(knowledge.knowledge, minitokens, miniast) fitted.append(subproperty) @@ -424,6 +453,7 @@ def match_fit(knowledge, tokens, matcher, ast, remaining_recursions): resolved_fits = [] for fit, _ in fully_matched_segments: + print(":::", fit) resolved_fit = resolve_fit(knowledge, fit, remaining_recursions) if resolved_fit is not None: resolved_fits.append(resolved_fit) diff --git a/naive-nlu/tree_nlu/test.py b/naive-nlu/tree_nlu/test.py index 810e3c8..1692e9a 100644 --- a/naive-nlu/tree_nlu/test.py +++ b/naive-nlu/tree_nlu/test.py @@ -3,7 +3,7 @@ import logging from .tests import basic from .tests import gac_100 -logging.getLogger().setLevel(logging.ERROR) +logging.getLogger().setLevel(logging.DEBUG) tests = ( ("basic", basic), diff --git a/naive-nlu/tree_nlu/tests/gac_100.py b/naive-nlu/tree_nlu/tests/gac_100.py index 7dc7b60..5e331b9 100644 --- a/naive-nlu/tree_nlu/tests/gac_100.py +++ b/naive-nlu/tree_nlu/tests/gac_100.py @@ -91,6 +91,9 @@ examples = [ "parsed": ("question", ('perform-verb-over-object', 'computers', 'use', 'electricity')), "answer": True, + "after_execution": [( + lambda knowledge: print("->", knowledge.knowledge['computers']) + ),], }), # { # "text": "The dominant language in france is french?", @@ -665,11 +668,14 @@ base_knowledge = { 'fly': { "groups": {'verb'}, }, + 'computers': { + "groups": {'object'}, + }, 'use': { "groups": {'verb'}, }, 'electricity': { - "groups": {}, + "groups": {'power'}, }, } @@ -693,16 +699,17 @@ def main(): show_progbar(i, total, data['text']) differences = knowledge.train([question]) + print(differences()) result, _, _ = knowledge.process(data['text']) - if result != data['answer']: - raise AssertionError('{} is not {}'.format(result, data['answer'])) - if "after_execution" in data: for f in data["after_execution"]: f(knowledge) + if result != data['answer']: + raise AssertionError('{} is not {}'.format(result, data['answer'])) + elif example_type == 'text_example': show_progbar(i, total, data['affirmation']) affirmation = data['affirmation']