From 359f858c394ef14ca17ec3086e75afb9b0832be4 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Sat, 30 Sep 2017 01:32:04 +0200 Subject: [PATCH] Add session files base concept. --- .gitignore | 1 + naive-nlu/tree_nlu/cli.py | 12 +++ naive-nlu/tree_nlu/knowledge_base.py | 13 +-- naive-nlu/tree_nlu/knowledge_evaluation.py | 20 ++-- naive-nlu/tree_nlu/parsing.py | 104 ++++++++++----------- naive-nlu/tree_nlu/session/org_mode.py | 45 +++++++++ naive-nlu/tree_nlu/test.py | 10 ++ naive-nlu/tree_nlu/tests/basic.py | 26 +++--- naive-nlu/tree_nlu/tests/gac_100.py | 24 ++--- 9 files changed, 162 insertions(+), 93 deletions(-) create mode 100644 naive-nlu/tree_nlu/session/org_mode.py diff --git a/.gitignore b/.gitignore index 3c698f6..474c6f3 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ *.ba?k *.pyc __pycache__ +treeNLU-*session-*.org diff --git a/naive-nlu/tree_nlu/cli.py b/naive-nlu/tree_nlu/cli.py index 73a0a80..82e55ea 100644 --- a/naive-nlu/tree_nlu/cli.py +++ b/naive-nlu/tree_nlu/cli.py @@ -1,4 +1,9 @@ import logging +import datetime +from .session.org_mode import ( + global_session as session, + create_global_session, +) from .knowledge_base import KnowledgeBase from .visualization import ( show_knowledge, @@ -15,14 +20,21 @@ from .modifiable_property import ( bye_phrases = ['bye', 'exit'] +def gen_session_name(): + now = datetime.datetime.utcnow() + return "treeNLU-cli-session-{}.org".format( + now.strftime("%y_%m_%d %H:%M:%S_%f")) + def main(): + create_global_session(gen_session_name()) logging.getLogger().setLevel(logging.INFO) knowledge = gac_100.main() logging.getLogger().setLevel(logging.DEBUG) while True: try: data = input("> ").strip() + session().log_step(data, 0) except EOFError: print("bye") break diff --git a/naive-nlu/tree_nlu/knowledge_base.py b/naive-nlu/tree_nlu/knowledge_base.py index ead4b07..3749059 100644 --- a/naive-nlu/tree_nlu/knowledge_base.py +++ b/naive-nlu/tree_nlu/knowledge_base.py @@ -1,7 +1,8 @@ import copy - import logging +from .session.org_mode import global_session as session + from . import parsing from . import knowledge_evaluation from .modifiable_property import is_modifiable_property @@ -31,18 +32,18 @@ class KnowledgeBase(object): }) self.act_upon(result) - logging.debug("\x1b[7;32m> {} \x1b[0m".format(example)) + session().annotate("\x1b[7;32m> {} \x1b[0m".format(example)) tokens, decomposition, inferred_tree = parsing.integrate_language(self, example) - logging.debug(tokens) + session().annotate(tokens) result = knowledge_evaluation.integrate_information(self.knowledge, { "elements": tokens, "decomposition": decomposition, "parsed": inferred_tree, }) - logging.debug("\x1b[7;33m< {} \x1b[0m".format(self.get_value(result))) + session().annotate("\x1b[7;33m< {} \x1b[0m".format(self.get_value(result))) self.act_upon(result) - logging.debug("\x1b[7;34m> set: {} \x1b[0m".format(self.get_value(result))) + session().annotate("\x1b[7;34m> set: {} \x1b[0m".format(self.get_value(result))) self.examples.append((decomposition, inferred_tree)) self.originals.append(example['text']) @@ -58,7 +59,7 @@ class KnowledgeBase(object): def process(self, row): row = row.lower() knowledge_before = copy.deepcopy(self.knowledge) - logging.debug("\x1b[7;32m> {} \x1b[0m".format(row)) + session().annotate("\x1b[7;32m> {} \x1b[0m".format(row)) tokens = parsing.to_tokens(row) fit = parsing.get_fit(self, tokens) if fit is None: diff --git a/naive-nlu/tree_nlu/knowledge_evaluation.py b/naive-nlu/tree_nlu/knowledge_evaluation.py index 2feb42c..e2704f9 100644 --- a/naive-nlu/tree_nlu/knowledge_evaluation.py +++ b/naive-nlu/tree_nlu/knowledge_evaluation.py @@ -1,4 +1,4 @@ -import logging +from .session.org_mode import global_session as session from .modifiable_property import ( ModifiableProperty, @@ -11,7 +11,7 @@ def resolve(knowledge_base, elements, value): if isinstance(value, int): return elements[value] elif isinstance(value, tuple) or isinstance(value, list): - logging.debug("V: {} {}".format(value, elements)) + session().annotate("V: {} {}".format(value, elements)) return integrate_information(knowledge_base, { "elements": elements, "parsed": value, @@ -103,16 +103,16 @@ def exists_property_with_value(knowledge_base, elements, subj, value): def modifiable_element_for_existance_in_set(container, set_name, element): - logging.debug("-----({} {} {})".format(container, set_name, element)) + session().annotate("-----({} {} {})".format(container, set_name, element)) def getter(): nonlocal container, set_name, element - logging.debug(" get({} {} {})".format(container, set_name, element)) + session().annotate(" get({} {} {})".format(container, set_name, element)) return (set_name in container) and (element in container[set_name]) def setter(): nonlocal container, set_name, element - logging.debug(" add({} {} {})".format(container, set_name, element)) + session().annotate(" add({} {} {})".format(container, set_name, element)) return container[set_name].add(element) return ModifiableProperty( @@ -219,7 +219,7 @@ def perform_verb_over_object(knowledge_base, elements, subj, verb, obj): subj = resolve(knowledge_base, elements, subj) verb = resolve(knowledge_base, elements, verb) obj = resolve(knowledge_base, elements, obj) - logging.debug("({} {} {})".format(verb, subj, obj)) + session().annotate("({} {} {})".format(verb, subj, obj)) if subj not in knowledge_base: knowledge_base[subj] = {'groups': set()} @@ -263,10 +263,10 @@ def integrate_information(knowledge_base, example): args = ast[1:] elements = example.get('elements', None) - logging.debug("Integrating:") - logging.debug("AST: {}".format(ast)) - logging.debug("ARG: {}".format(elements)) - logging.debug("------------") + session().annotate("Integrating:") + session().annotate("AST: {}".format(ast)) + session().annotate("ARG: {}".format(elements)) + session().annotate("------------") return tagged_with_ast( ast, elements, diff --git a/naive-nlu/tree_nlu/parsing.py b/naive-nlu/tree_nlu/parsing.py index 632a959..f8369a0 100644 --- a/naive-nlu/tree_nlu/parsing.py +++ b/naive-nlu/tree_nlu/parsing.py @@ -3,7 +3,7 @@ from . import knowledge_evaluation from . import depth_meter -import logging +from .session.org_mode import global_session as session import re import copy @@ -20,9 +20,9 @@ def to_tokens(text): def make_template(knowledge_base, tokens, parsed): matcher = list(tokens) template = list(parsed) - logging.debug(" -- MK TEMPLATE --") - logging.debug("MATCHR: {}".format(matcher)) - logging.debug("TEMPLT: {}".format(template)) + session().annotate(" -- MK TEMPLATE --") + session().annotate("MATCHR: {}".format(matcher)) + session().annotate("TEMPLT: {}".format(template)) for i in range(len(matcher)): word = matcher[i] if word in template: @@ -59,11 +59,11 @@ def get_lower_levels(parsed): # TODO: probably optimize this, it creates lots of unnecessary tuples def replace_position(tree, position, new_element): - logging.debug("REPLACE POSITIONS:") - logging.debug(" TREE : {}".format(tree)) - logging.debug("POSITION: {}".format(position)) - logging.debug("NEW ELEM: {}".format(new_element)) - logging.debug("------------------") + session().annotate("REPLACE POSITIONS:") + session().annotate(" TREE : {}".format(tree)) + session().annotate("POSITION: {}".format(position)) + session().annotate("NEW ELEM: {}".format(new_element)) + session().annotate("------------------") def aux(current_tree, remaining_route): if len(remaining_route) == 0: @@ -78,7 +78,7 @@ def replace_position(tree, position, new_element): ) result = aux(tree, position) - logging.debug("-RESULT: {}".format(result)) + session().annotate("-RESULT: {}".format(result)) return result @@ -90,37 +90,37 @@ def integrate_language(knowledge_base, example): tokens = to_tokens(text) while True: - logging.debug("P: {}".format(resolved_parsed)) + session().annotate("P: {}".format(resolved_parsed)) lower_levels = get_lower_levels(resolved_parsed) - logging.debug("Lower: {}".format(lower_levels)) + session().annotate("Lower: {}".format(lower_levels)) if len(lower_levels) == 0: break for position, atom in lower_levels: - logging.debug("\x1b[1mSelecting\x1b[0m: {}".format(atom)) + session().annotate("\x1b[1mSelecting\x1b[0m: {}".format(atom)) similar = get_similar_tree(knowledge_base, atom, tokens) remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar) after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix) - logging.debug("--FIND MIX--") - logging.debug("-MIX- | {}".format(remix)) - logging.debug("-FRM- | {}".format(tokens)) - logging.debug("-AFT- | {}".format(after_remix)) + session().annotate("--FIND MIX--") + session().annotate("-MIX- | {}".format(remix)) + session().annotate("-FRM- | {}".format(tokens)) + session().annotate("-AFT- | {}".format(after_remix)) - logging.debug("--- TEMPLATE ---") + session().annotate("--- TEMPLATE ---") _, matcher, result = make_template(knowledge_base, after_remix, atom) - logging.debug("Tx: {}".format(after_remix)) - logging.debug("Mx: {}".format(matcher)) - logging.debug("Rx: {}".format(result)) - logging.debug("Sx: {}".format(start_bounds)) - logging.debug("Ex: {}".format(end_bounds)) + session().annotate("Tx: {}".format(after_remix)) + session().annotate("Mx: {}".format(matcher)) + session().annotate("Rx: {}".format(result)) + session().annotate("Sx: {}".format(start_bounds)) + session().annotate("Ex: {}".format(end_bounds)) assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens)) - logging.debug( " +-> {}".format(after_remix)) + session().annotate( " +-> {}".format(after_remix)) subquery_type = knowledge_evaluation.get_subquery_type(knowledge_base.knowledge, atom) - logging.debug(r" \-> <{}>".format(subquery_type)) + session().annotate(r" \-> <{}>".format(subquery_type)) # Clean remaining tokens new_tokens = list(tokens) @@ -133,16 +133,16 @@ def integrate_language(knowledge_base, example): tokens = new_tokens resolved_parsed = replace_position(resolved_parsed, position, offset) - logging.debug("RP: {}".format(resolved_parsed)) - logging.debug("AT: {}".format(atom)) - logging.debug("#########") + session().annotate("RP: {}".format(resolved_parsed)) + session().annotate("AT: {}".format(atom)) + session().annotate("#########") tokens, matcher, result = make_template(knowledge_base, tokens, resolved_parsed) - logging.debug("T: {}".format(tokens)) - logging.debug("M: {}".format(matcher)) - logging.debug("R: {}".format(result)) - logging.debug("---") + session().annotate("T: {}".format(tokens)) + session().annotate("M: {}".format(matcher)) + session().annotate("R: {}".format(result)) + session().annotate("---") return tokens, matcher, result @@ -180,8 +180,8 @@ def get_possible_remixes(knowledge_base, matcher, similar_matcher): matrix = [] for element in matcher: - logging.debug("- {}".format(element)) - logging.debug("+ {}".format(similar_matcher)) + session().annotate("- {}".format(element)) + session().annotate("+ {}".format(similar_matcher)) if element in similar_matcher or isinstance(element, dict): if isinstance(element, dict): indexes = all_matching_indexes(knowledge_base, similar_matcher, element) @@ -298,12 +298,12 @@ def get_similar_tree(knowledge_base, atom, tokens): return None for i, possibility in enumerate(sorted_possibilities): - logging.debug('---- POSSIBILITY #{} ----'.format(i)) + session().annotate('---- POSSIBILITY #{} ----'.format(i)) similar_matcher, similar_result, similar_result_resolved, _, _ = possibility - logging.debug('AST: {}'.format(similar_result)) - logging.debug('Based on: {}'.format(similar_matcher)) - logging.debug('Results on: {}'.format(similar_result_resolved)) - logging.debug('---------------------') + session().annotate('AST: {}'.format(similar_result)) + session().annotate('Based on: {}'.format(similar_matcher)) + session().annotate('Results on: {}'.format(similar_result_resolved)) + session().annotate('---------------------') return sorted_possibilities[0] @@ -382,9 +382,9 @@ def get_fit(knowledge, tokens, remaining_recursions=parameters.MAX_RECURSIONS): if result is not None: results.append(result) - logging.debug("XXX {}".format(result)) + session().annotate("XXX {}".format(result)) - logging.debug(' - ' + '\n - '.join(map(str, results))) + session().annotate(' - ' + '\n - '.join(map(str, results))) if len(results) > 0: return results[0] @@ -419,7 +419,7 @@ def resolve_fit(knowledge, fit, remaining_recursions): return None minitokens, miniast = minifit - logging.debug(" AST | {}".format(miniast)) + session().annotate(" AST | {}".format(miniast)) subproperty = knowledge_evaluation.resolve(knowledge.knowledge, minitokens, miniast) fitted.append(subproperty) @@ -429,17 +429,17 @@ def resolve_fit(knowledge, fit, remaining_recursions): def match_fit(knowledge, tokens, matcher, ast, remaining_recursions): segment_possibilities = [([], tokens)] # Matched tokens, remaining tokens indent = ' ' * (parameters.MAX_RECURSIONS - remaining_recursions) - logging.debug(indent + 'T> {}'.format(tokens)) - logging.debug(indent + 'M> {}'.format(matcher)) + session().annotate(indent + 'T> {}'.format(tokens)) + session().annotate(indent + 'M> {}'.format(matcher)) for minisegment in matcher: possibilities_after_round = [] - logging.debug(indent + "MS {}".format(minisegment)) + session().annotate(indent + "MS {}".format(minisegment)) for matched_tokens, remaining_tokens in segment_possibilities: if len(remaining_tokens) < 1: continue - logging.debug(indent + "RT {}".format(remaining_tokens[0])) - logging.debug(indent + "DEF {}".format(is_definite_minisegment(minisegment))) + session().annotate(indent + "RT {}".format(remaining_tokens[0])) + session().annotate(indent + "DEF {}".format(is_definite_minisegment(minisegment))) if is_definite_minisegment(minisegment): # What if not match -----< if match_token(knowledge, remaining_tokens[0], minisegment): @@ -455,10 +455,10 @@ def match_fit(knowledge, tokens, matcher, ast, remaining_recursions): matched_tokens + [(minisegment, remaining_tokens[:i])], remaining_tokens[i:] )) - logging.debug(indent + "## PA {}".format(possibilities_after_round)) + session().annotate(indent + "## PA {}".format(possibilities_after_round)) else: segment_possibilities = possibilities_after_round - logging.debug(">>>> {}".format(len(segment_possibilities))) + session().annotate(">>>> {}".format(len(segment_possibilities))) fully_matched_segments = [(matched, remaining) for (matched, remaining) @@ -467,11 +467,11 @@ def match_fit(knowledge, tokens, matcher, ast, remaining_recursions): resolved_fits = [] for fit, _ in fully_matched_segments: - logging.debug(indent + "::: {}".format(fit)) # REMIXES HAVE TO BE APPLIED BEFORE!!! + session().annotate(indent + "::: {}".format(fit)) # REMIXES HAVE TO BE APPLIED BEFORE!!! - logging.debug(indent + '*' * 20) + session().annotate(indent + '*' * 20) for fit, _ in fully_matched_segments: - logging.debug(indent + "::: {}".format(fit)) # REMIXES HAVE TO BE APPLIED BEFORE!!! + session().annotate(indent + "::: {}".format(fit)) # REMIXES HAVE TO BE APPLIED BEFORE!!! resolved_fit = resolve_fit(knowledge, fit, remaining_recursions) if resolved_fit is not None: resolved_fits.append(resolved_fit) diff --git a/naive-nlu/tree_nlu/session/org_mode.py b/naive-nlu/tree_nlu/session/org_mode.py new file mode 100644 index 0000000..13ee3ed --- /dev/null +++ b/naive-nlu/tree_nlu/session/org_mode.py @@ -0,0 +1,45 @@ +import logging +import datetime + +SESSION = None + + +def __gen_session_name__(): + now = datetime.datetime.utcnow() + return "treeNLU-session-{}.org".format( + now.strftime("%y_%m_%d %H:%M:%S_%f")) + + +def create_global_session(fname): + global SESSION + SESSION = OrgModeSession(fname) + + +def global_session(): + if SESSION is None: + session_name = __gen_session_name__() + logging.warn("Session not created, saved on {}".format(session_name)) + create_global_session(session_name) + + assert(SESSION is not None) + return SESSION + + +class OrgModeSession: + def __init__(self, fname): + self.f = open(fname, 'wt') + self.last_level = 0 + + def annotate(self, annotation): + self.f.write("{indentation} {data}\n".format( + indentation=' ' * (self.last_level + 2 + 1), + data=annotation)) + + def log_step(self, string, level): + self.f.write("{indentation} {data}\n".format( + indentation='*' * (level + 1), + data=string)) + self.last_level = level + + def close(self): + self.f.close() diff --git a/naive-nlu/tree_nlu/test.py b/naive-nlu/tree_nlu/test.py index 3a67370..ee048e4 100644 --- a/naive-nlu/tree_nlu/test.py +++ b/naive-nlu/tree_nlu/test.py @@ -1,5 +1,7 @@ import traceback import logging +import datetime +from .session import org_mode from .tests import basic from .tests import gac_100 from .tests import gac_extension @@ -12,7 +14,15 @@ tests = ( ("gac+", gac_extension), ) + +def gen_session_name(): + now = datetime.datetime.utcnow() + return "treeNLU-test-session-{}.org".format( + now.strftime("%y_%m_%d %H:%M:%S_%f")) + + def main(): + org_mode.create_global_session(gen_session_name()) failed = False for test_name, test_module in tests: try: diff --git a/naive-nlu/tree_nlu/tests/basic.py b/naive-nlu/tree_nlu/tests/basic.py index 414a0ce..4fc7e48 100644 --- a/naive-nlu/tree_nlu/tests/basic.py +++ b/naive-nlu/tree_nlu/tests/basic.py @@ -1,4 +1,4 @@ -import logging +from ..session.org_mode import global_session as session import json from ..knowledge_base import KnowledgeBase @@ -110,13 +110,13 @@ base_knowledge = { } def test_assumption(expectedResponse, knowledge, query): - logging.debug("Query: {}".format(query['text'])) - logging.debug("Expected: {}".format(expectedResponse)) + session().annotate("Query: {}".format(query['text'])) + session().annotate("Expected: {}".format(expectedResponse)) result, abstract_tree, diff = knowledge.process(query['text']) end_result = result.getter() if is_modifiable_property(result) else result - logging.debug("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result)) + session().annotate("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result)) if end_result != expectedResponse: raise AssertionError('{} is not {}'.format(end_result, expectedResponse)) @@ -127,9 +127,9 @@ def main(): differences = knowledge.train(examples) - logging.debug("----") - logging.debug(differences()) - logging.debug("----") + session().annotate("----") + session().annotate(differences()) + session().annotate("----") test_assumption(True, knowledge, {'text': 'earth is a planet'}) test_assumption(True, knowledge, {'text': 'is lava dangerous?'}) @@ -137,12 +137,12 @@ def main(): row = test['text'] result, inferred_tree, differences = knowledge.process(row) - logging.debug("result:", result) - logging.debug(differences()) - logging.debug("---") - logging.debug('-----') - logging.debug(json.dumps(sorted(knowledge.knowledge.keys()), indent=4)) - logging.debug('-----') + session().annotate("result: {}".format(result)) + session().annotate(differences()) + session().annotate("---") + session().annotate('-----') + session().annotate(json.dumps(sorted(knowledge.knowledge.keys()), indent=4)) + session().annotate('-----') queryTrue = { "text": "is io a moon?", diff --git a/naive-nlu/tree_nlu/tests/gac_100.py b/naive-nlu/tree_nlu/tests/gac_100.py index 9907f8c..5e09abc 100644 --- a/naive-nlu/tree_nlu/tests/gac_100.py +++ b/naive-nlu/tree_nlu/tests/gac_100.py @@ -1,4 +1,4 @@ -import logging +from ..session.org_mode import global_session as session from ..knowledge_base import KnowledgeBase from ..utils.visuals import show_progbar from ..visualization import show_knowledge @@ -178,12 +178,12 @@ examples = [ # "parsed": (), # "answer": None, # }, - # { - # "text": "Is water a liquid?", - # "affirmation": "Is water a liquid?", - # "parsed": (), - # "answer": None, - # }, + # ('text_example', + # { + # "question": "is water a liquid?", + # "affirmation": "water is a liquid", + # "answer": True, + # }), # { # "text": "Is Bugs Bunny a cartoon character?", # "affirmation": "Is Bugs Bunny a cartoon character?", @@ -696,7 +696,7 @@ def main(): for i, (example_type, data) in enumerate(examples): if example_type == 'full_example': - logging.info(data['affirmation']) + session().log_step(data['affirmation'], 0) affirmation = { 'text': data['affirmation'], 'parsed': data['parsed'][1], @@ -708,7 +708,7 @@ def main(): show_progbar(i, total, data['text']) differences = knowledge.train([question]) - logging.debug(differences()) + session().annotate(differences()) result, _, _ = knowledge.process(data['text']) @@ -720,16 +720,16 @@ def main(): raise AssertionError('{} is not {}'.format(result, data['answer'])) elif example_type == 'text_example': - logging.info(data['affirmation']) + session().log_step(data['affirmation'], 0) show_progbar(i, total, data['affirmation']) affirmation = data['affirmation'] - logging.debug("Processing affirmation: {}".format(affirmation)) + session().annotate("Processing affirmation: {}".format(affirmation)) _, _, _ = knowledge.process(affirmation) show_progbar(i, total, data['question']) question = data['question'] - logging.debug("Processing question : {}".format(question)) + session().annotate("Processing question : {}".format(question)) result, _, _ = knowledge.process(question) if result != data['answer']: