diff --git a/naive-nlu/tree_nlu/knowledge_base.py b/naive-nlu/tree_nlu/knowledge_base.py index 31c84a1..33bd9e9 100644 --- a/naive-nlu/tree_nlu/knowledge_base.py +++ b/naive-nlu/tree_nlu/knowledge_base.py @@ -22,7 +22,6 @@ class KnowledgeBase(object): knowledge_before = copy.deepcopy(self.knowledge) # Parse everything - parsed_examples = [] for example in examples: # If there's parsed data, leverage it ASAP if 'parsed' in example: diff --git a/naive-nlu/tree_nlu/knowledge_evaluation.py b/naive-nlu/tree_nlu/knowledge_evaluation.py index 524bd56..2e71712 100644 --- a/naive-nlu/tree_nlu/knowledge_evaluation.py +++ b/naive-nlu/tree_nlu/knowledge_evaluation.py @@ -1,3 +1,5 @@ +import logging + from .modifiable_property import ( ModifiableProperty, ModifiablePropertyWithAst, @@ -9,6 +11,7 @@ def resolve(knowledge_base, elements, value): if isinstance(value, int): return elements[value] elif isinstance(value, tuple) or isinstance(value, list): + print("V:", value, elements) return integrate_information(knowledge_base, { "elements": elements, "parsed": value, @@ -100,11 +103,17 @@ def exists_property_with_value(knowledge_base, elements, subj, value): def modifiable_element_for_existance_in_set(container, set_name, element): + print("-----({} {} {})".format(container, set_name, element)) + import traceback + # traceback.print_stack() + def getter(): nonlocal container, set_name, element + print(" get({} {} {})".format(container, set_name, element)) return (set_name in container) and (element in container[set_name]) def setter(): + print(" add({} {} {})".format(container, set_name, element)) nonlocal container, set_name, element return container[set_name].add(element) @@ -208,6 +217,27 @@ def property_has_value(knowledge_base, elements, subj, prop, value): element=value ) +def perform_verb_over_object(knowledge_base, elements, subj, verb, obj): + subj = resolve(knowledge_base, elements, subj) + verb = resolve(knowledge_base, elements, verb) + obj = resolve(knowledge_base, elements, obj) + logging.debug("({} {} {})".format(verb, subj, obj)) + + if subj not in knowledge_base: + knowledge_base[subj] = {'groups': set()} + + if 'performs-over' not in knowledge_base[subj]: + knowledge_base[subj]['performs-over'] = {} + + if verb not in knowledge_base[subj]['performs-over']: + knowledge_base[subj]['performs-over'][verb] = set() + + return modifiable_element_for_existance_in_set( + container=knowledge_base[subj]['performs-over'], + set_name=verb, + element=obj + ) + knowledge_ingestion = { "exists-property-with-value": exists_property_with_value, @@ -216,6 +246,7 @@ knowledge_ingestion = { "question": question, "implies": implies, "property-has-value": property_has_value, + "perform-verb-over-object": perform_verb_over_object, } @@ -234,6 +265,29 @@ def integrate_information(knowledge_base, example): args = ast[1:] elements = example.get('elements', None) + logging.debug("Integrating:") + logging.debug("AST: {}".format(ast)) + logging.debug("ARG: {}".format(elements)) + logging.debug("------------") + return tagged_with_ast( ast, elements, knowledge_ingestion[method](knowledge_base, elements, *args)) + +def can_be_used_in_place(knowledge, token, minisegment): + if token not in knowledge.knowledge: + return False + + info = knowledge.knowledge[token] + info_groups = info.get('groups', set()) + minisegment_groups = minisegment.get('groups', set()) + + # Common group + if len(info_groups & minisegment_groups) > 0: + return True + + # Neither has a group + elif len(info_groups) == 0 == len(minisegment_groups): + return True + + return False diff --git a/naive-nlu/tree_nlu/parsing.py b/naive-nlu/tree_nlu/parsing.py index 265bd59..5352812 100644 --- a/naive-nlu/tree_nlu/parsing.py +++ b/naive-nlu/tree_nlu/parsing.py @@ -20,6 +20,9 @@ def to_tokens(text): def make_template(knowledge_base, tokens, parsed): matcher = list(tokens) template = list(parsed) + logging.debug(" -- MK TEMPLATE --") + logging.debug("MATCHR: {}".format(matcher)) + logging.debug("TEMPLT: {}".format(template)) for i in range(len(matcher)): word = matcher[i] if word in template: @@ -56,6 +59,11 @@ def get_lower_levels(parsed): # TODO: probably optimize this, it creates lots of unnecessary tuples def replace_position(tree, position, new_element): + logging.debug("REPLACE POSITIONS:") + logging.debug(" TREE : {}".format(tree)) + logging.debug("POSITION: {}".format(position)) + logging.debug("NEW ELEM: {}".format(new_element)) + logging.debug("------------------") def aux(current_tree, remaining_route): if len(remaining_route) == 0: @@ -69,7 +77,9 @@ def replace_position(tree, position, new_element): + tree[step + 2:] ) - return aux(tree, position) + result = aux(tree, position) + logging.debug("-RESULT: {}".format(result)) + return result def integrate_language(knowledge_base, example): @@ -90,15 +100,23 @@ def integrate_language(knowledge_base, example): logging.debug("\x1b[1mSelecting\x1b[0m: {}".format(atom)) similar = get_similar_tree(knowledge_base, atom, tokens) remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar) - _, matcher, result = make_template(knowledge_base, tokens, atom) - logging.debug("Tx: {}".format(tokens)) + + after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix) + logging.debug("--FIND MIX--") + logging.debug("-MIX- | {}".format(remix)) + logging.debug("-FRM- | {}".format(tokens)) + logging.debug("-AFT- | {}".format(after_remix)) + + print() + + _, matcher, result = make_template(knowledge_base, after_remix, atom) + logging.debug("Tx: {}".format(after_remix)) logging.debug("Mx: {}".format(matcher)) logging.debug("Rx: {}".format(result)) - logging.debug("Remix: {}".format(remix)) logging.debug("Sx: {}".format(start_bounds)) logging.debug("Ex: {}".format(end_bounds)) - after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix) + assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens)) logging.debug( " +-> {}".format(after_remix)) subquery_type = knowledge_evaluation.get_subquery_type(knowledge_base.knowledge, atom) @@ -115,6 +133,8 @@ def integrate_language(knowledge_base, example): tokens = new_tokens resolved_parsed = replace_position(resolved_parsed, position, offset) + logging.debug("RP: {}".format(resolved_parsed)) + logging.debug("AT: {}".format(atom)) logging.debug("#########") @@ -277,6 +297,14 @@ def get_similar_tree(knowledge_base, atom, tokens): if len(sorted_possibilities) < 1: return None + for i, possibility in enumerate(sorted_possibilities): + logging.debug('---- POSSIBILITY #{} ----'.format(i)) + similar_matcher, similar_result, similar_result_resolved, _, _ = possibility + logging.debug('AST: {}'.format(similar_result)) + logging.debug('Based on: {}'.format(similar_matcher)) + logging.debug('Results on: {}'.format(similar_result_resolved)) + logging.debug('---------------------') + return sorted_possibilities[0] @@ -336,7 +364,7 @@ def reverse_remix(tree_section, remix): offset = 0 for origin in remix: if isinstance(origin, int): - if origin >= len(tree_section): + if (origin + offset) >= len(tree_section): return None result_section.append(copy.deepcopy(tree_section[origin + offset])) @@ -347,13 +375,18 @@ def reverse_remix(tree_section, remix): def get_fit(knowledge, tokens, remaining_recursions=parameters.MAX_RECURSIONS): + results = [] for matcher, ast in knowledge.trained: result = match_fit(knowledge, tokens, matcher, ast, remaining_recursions) - if result is not None: - return result - return None + if result is not None: + results.append(result) + print("XXX", result) + + print(' - ' + '\n - '.join(map(str, results))) + if len(results) > 0: + return results[0] def is_definite_minisegment(minisegment): @@ -362,8 +395,7 @@ def is_definite_minisegment(minisegment): def match_token(knowledge, next_token, minisegment): if isinstance(minisegment, dict): - # TODO: check if the dictionary matches the values - return True + return knowledge_evaluation.can_be_used_in_place(knowledge, next_token, minisegment) elif isinstance(minisegment, str): # TODO: check if the two elements can be used in each other place return next_token == minisegment @@ -382,11 +414,20 @@ def resolve_fit(knowledge, fit, remaining_recursions): if remixed_tokens is None: return None + # if len(tokens) == 3 and tokens[2] == 'electricity': + # logging.debug("--UNMIX--") + # logging.debug("-MIX- | {}".format(remixer)) + # logging.debug("REMIX | {}".format(tokens)) + # logging.debug(" T O | {}".format(remixed_tokens)) + # if remixer != [0, 1, 2]: + # return None + minifit = get_fit(knowledge, remixed_tokens, remaining_recursions - 1) if minifit is None: return None minitokens, miniast = minifit + logging.debug(" AST | {}".format(miniast)) subproperty = knowledge_evaluation.resolve(knowledge.knowledge, minitokens, miniast) fitted.append(subproperty) @@ -395,6 +436,7 @@ def resolve_fit(knowledge, fit, remaining_recursions): def match_fit(knowledge, tokens, matcher, ast, remaining_recursions): segment_possibilities = [([], tokens)] # Matched tokens, remaining tokens + indent = ' ' * (parameters.MAX_RECURSIONS - remaining_recursions) for minisegment in matcher: possibilities_after_round = [] for matched_tokens, remaining_tokens in segment_possibilities: @@ -424,6 +466,11 @@ def match_fit(knowledge, tokens, matcher, ast, remaining_recursions): resolved_fits = [] for fit, _ in fully_matched_segments: + print(indent + ":::", fit) # REMIXES HAVE TO BE APPLIED BEFORE!!! + + print(indent + '*' * 20) + for fit, _ in fully_matched_segments: + print(indent + ":::", fit) # REMIXES HAVE TO BE APPLIED BEFORE!!! resolved_fit = resolve_fit(knowledge, fit, remaining_recursions) if resolved_fit is not None: resolved_fits.append(resolved_fit) diff --git a/naive-nlu/tree_nlu/test.py b/naive-nlu/tree_nlu/test.py index 810e3c8..1692e9a 100644 --- a/naive-nlu/tree_nlu/test.py +++ b/naive-nlu/tree_nlu/test.py @@ -3,7 +3,7 @@ import logging from .tests import basic from .tests import gac_100 -logging.getLogger().setLevel(logging.ERROR) +logging.getLogger().setLevel(logging.DEBUG) tests = ( ("basic", basic), diff --git a/naive-nlu/tree_nlu/tests/basic.py b/naive-nlu/tree_nlu/tests/basic.py index 450e7e0..d5d959a 100644 --- a/naive-nlu/tree_nlu/tests/basic.py +++ b/naive-nlu/tree_nlu/tests/basic.py @@ -98,6 +98,12 @@ base_knowledge = { 'fly': { "groups": {'verb'}, }, + 'bus': { + "groups": {'noun'}, + }, + 'run': { + "groups": {'verb'}, + }, 'swim': { "groups": {'verb'}, }, diff --git a/naive-nlu/tree_nlu/tests/gac_100.py b/naive-nlu/tree_nlu/tests/gac_100.py index b2c31e0..daaa696 100644 --- a/naive-nlu/tree_nlu/tests/gac_100.py +++ b/naive-nlu/tree_nlu/tests/gac_100.py @@ -1,3 +1,4 @@ +import logging from ..knowledge_base import KnowledgeBase from ..utils.visuals import show_progbar @@ -85,12 +86,16 @@ examples = [ lambda knowledge: _assert('man' in knowledge.knowledge['socrates']['groups']) ),], }), - # { - # "text": "Computers use electricity?", - # "affirmation": "Computers use electricity?", - # "parsed": (), - # "answer": None, - # }, + ('full_example',{ + "text": "Computers use electricity?", + "affirmation": "Computers use electricity", + "parsed": ("question", + ('perform-verb-over-object', 'computers', 'use', 'electricity')), + "answer": True, + "after_execution": [( + lambda knowledge: print("->", knowledge.knowledge['computers']) + ),], + }), # { # "text": "The dominant language in france is french?", # "affirmation": "The dominant language in france is french?", @@ -105,7 +110,7 @@ examples = [ # }, ('text_example', { - "question": "Is milk white?", + "question": "is milk white?", "affirmation": "milk is white", "answer": True, }), @@ -658,12 +663,27 @@ base_knowledge = { 'planet': { "groups": {'noun', 'group'}, }, + 'white': { + "groups": {'noun', 'color', 'concept', 'property'}, + }, 'green': { "groups": {'noun', 'color', 'concept'}, }, + 'milk': { + "groups": {'noun'}, + }, 'fly': { "groups": {'verb'}, }, + 'computers': { + "groups": {'object'}, + }, + 'use': { + "groups": {'verb'}, + }, + 'electricity': { + "groups": {'power'}, + }, } def main(): @@ -686,24 +706,26 @@ def main(): show_progbar(i, total, data['text']) differences = knowledge.train([question]) + print(differences()) result, _, _ = knowledge.process(data['text']) - if result != data['answer']: - raise AssertionError('{} is not {}'.format(result, data['answer'])) - if "after_execution" in data: for f in data["after_execution"]: f(knowledge) + if result != data['answer']: + raise AssertionError('{} is not {}'.format(result, data['answer'])) + elif example_type == 'text_example': show_progbar(i, total, data['affirmation']) affirmation = data['affirmation'] + logging.debug("Processing affirmation: {}".format(affirmation)) + _, _, _ = knowledge.process(affirmation) show_progbar(i, total, data['question']) question = data['question'] - - _, _, _ = knowledge.process(affirmation) + logging.debug("Processing question : {}".format(question)) result, _, _ = knowledge.process(question) if result != data['answer']: