From 851ab1de20c195c5e621ed509c94243898a72e38 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 17 May 2017 23:54:14 +0200 Subject: [PATCH] Blindly replace all prints by logging.debug. Also, add a "depth" meter to measure the depth level in the tree matching. --- naive-nlu/depth_meter.py | 5 + naive-nlu/knowledge_base.py | 13 +-- naive-nlu/knowledge_evaluation.py | 4 +- naive-nlu/parsing.py | 168 +++++++++++++++--------------- naive-nlu/test.py | 32 +++--- 5 files changed, 117 insertions(+), 105 deletions(-) create mode 100644 naive-nlu/depth_meter.py diff --git a/naive-nlu/depth_meter.py b/naive-nlu/depth_meter.py new file mode 100644 index 0000000..8bc3073 --- /dev/null +++ b/naive-nlu/depth_meter.py @@ -0,0 +1,5 @@ +import sys + +def show_depth(depth: int): + sys.stdout.write("\r\x1b[K" + (u'█' * int(depth / 2)) + (u'▌' * int(depth % 2)) + "\x1b[7m \x1b[0m\b") + sys.stdout.flush() diff --git a/naive-nlu/knowledge_base.py b/naive-nlu/knowledge_base.py index 3fedac2..08d51e3 100644 --- a/naive-nlu/knowledge_base.py +++ b/naive-nlu/knowledge_base.py @@ -1,5 +1,6 @@ import copy +import logging import parsing import knowledge_evaluation from modifiable_property import ModifiableProperty @@ -22,18 +23,18 @@ class KnowledgeBase(object): # Parse everything parsed_examples = [] for example in examples: - print("\x1b[7;32m> {} \x1b[0m".format(example)) + logging.debug("\x1b[7;32m> {} \x1b[0m".format(example)) tokens, decomposition, inferred_tree = parsing.integrate_language(self, example) - print(tokens) + logging.debug(tokens) result = knowledge_evaluation.integrate_information(self.knowledge, { "elements": tokens, "decomposition": decomposition, "parsed": inferred_tree, }) - print("\x1b[7;33m< {} \x1b[0m".format(self.get_value(result))) + logging.debug("\x1b[7;33m< {} \x1b[0m".format(self.get_value(result))) self.act_upon(result) - print("\x1b[7;34m< {} \x1b[0m".format(self.get_value(result))) + logging.debug("\x1b[7;34m> set: {} \x1b[0m".format(self.get_value(result))) self.examples.append((decomposition, inferred_tree)) # Reduce values @@ -48,7 +49,7 @@ class KnowledgeBase(object): def process(self, row): knowledge_before = copy.deepcopy(self.knowledge) - print("\x1b[7;32m> {} \x1b[0m".format(row)) + logging.debug("\x1b[7;32m> {} \x1b[0m".format(row)) tokens, inferred_tree = parsing.get_fit(self, row) result = knowledge_evaluation.integrate_information(self.knowledge, { @@ -73,4 +74,4 @@ class KnowledgeBase(object): if isinstance(result, ModifiableProperty): result.setter() else: - print(result) + logging.debug(result) diff --git a/naive-nlu/knowledge_evaluation.py b/naive-nlu/knowledge_evaluation.py index fb717f3..7784b05 100644 --- a/naive-nlu/knowledge_evaluation.py +++ b/naive-nlu/knowledge_evaluation.py @@ -37,8 +37,8 @@ def get_subquery_type(knowledge_base, atom): def property_for_value(knowledge_base, value): - # print(value) - # print(knowledge_base[value]) + # logging.debug(value) + # logging.debug(knowledge_base[value]) return knowledge_base[value]['as_property'] diff --git a/naive-nlu/parsing.py b/naive-nlu/parsing.py index 6c5ab44..353cabb 100644 --- a/naive-nlu/parsing.py +++ b/naive-nlu/parsing.py @@ -2,8 +2,11 @@ import knowledge_evaluation +import depth_meter +import logging import re import copy + from functools import reduce from typing import List from modifiable_property import ModifiableProperty @@ -78,28 +81,28 @@ def integrate_language(knowledge_base, example): tokens = to_tokens(text) while True: - print("P:", resolved_parsed) + logging.debug("P:", resolved_parsed) lower_levels = get_lower_levels(resolved_parsed) - print("Lower:", lower_levels) + logging.debug("Lower:", lower_levels) if len(lower_levels) == 0: break for position, atom in lower_levels: - print("\x1b[1mSelecting\x1b[0m:", atom) + logging.debug("\x1b[1mSelecting\x1b[0m:", atom) similar = get_similar_tree(knowledge_base, atom) - print("___>", similar) + logging.debug("___>", similar) remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar) _, matcher, result = make_template(knowledge_base, tokens, atom) - print("Tx:", tokens) - print("Mx:", matcher) - print("Rx:", result) - print("Remix:", remix) + logging.debug("Tx:", tokens) + logging.debug("Mx:", matcher) + logging.debug("Rx:", result) + logging.debug("Remix:", remix) after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix) assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens)) - print( " +->", after_remix) + logging.debug( " +->", after_remix) subquery_type = knowledge_evaluation.get_subquery_type(knowledge_base.knowledge, atom) - print(r" \-> <{}>".format(subquery_type)) + logging.debug(r" \-> <{}>".format(subquery_type)) # Clean remaining tokens new_tokens = list(tokens) @@ -112,14 +115,14 @@ def integrate_language(knowledge_base, example): tokens = new_tokens resolved_parsed = replace_position(resolved_parsed, position, offset) - print("#########") + logging.debug("#########") tokens, matcher, result = make_template(knowledge_base, tokens, resolved_parsed) - print("T:", tokens) - print("M:", matcher) - print("R:", result) - print() + logging.debug("T:", tokens) + logging.debug("M:", matcher) + logging.debug("R:", result) + logging.debug("---") return tokens, matcher, result @@ -131,26 +134,26 @@ def apply_remix(tokens, remix): def build_remix_matrix(knowledge_base, tokens, atom, similar): - # print("+" * 20) + # logging.debug("+" * 20) tokens = list(tokens) tokens, matcher, result = make_template(knowledge_base, tokens, atom) similar_matcher, similar_result, similar_result_resolved, _ = similar - # print("NEW:") - # print("Tokens:", tokens) - # print("Matcher:", matcher) - # print("Result:", result) - # print() - # print("Similar:") - # print("Matcher:", similar_matcher) - # print("Result:", similar_result) + # logging.debug("NEW:") + # logging.debug("Tokens:", tokens) + # logging.debug("Matcher:", matcher) + # logging.debug("Result:", result) + # logging.debug("---") + # logging.debug("Similar:") + # logging.debug("Matcher:", similar_matcher) + # logging.debug("Result:", similar_result) start_bounds, end_bounds = find_bounds(matcher, similar_matcher) - # print() - # print("Bounds:") - # print("Start:", start_bounds) - # print("End: ", end_bounds) + # logging.debug("---") + # logging.debug("Bounds:") + # logging.debug("Start:", start_bounds) + # logging.debug("End: ", end_bounds) for i, element in (end_bounds + start_bounds[::-1]): matcher.pop(i) @@ -159,20 +162,20 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar): possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher) chosen_remix = possible_remixes[0] - # print("New tokens:", tokens) - # print("-" * 20) + # logging.debug("New tokens:", tokens) + # logging.debug("-" * 20) return chosen_remix, (start_bounds, end_bounds) def get_possible_remixes(knowledge_base, matcher, similar_matcher): - # print("*" * 20) - # print(matcher) - # print(similar_matcher) + # logging.debug("*" * 20) + # logging.debug(matcher) + # logging.debug(similar_matcher) matrix = [] for element in matcher: - print("-", element) - print("+", similar_matcher) + logging.debug("-", element) + logging.debug("+", similar_matcher) assert(element in similar_matcher or isinstance(element, dict)) if isinstance(element, dict): @@ -181,8 +184,8 @@ def get_possible_remixes(knowledge_base, matcher, similar_matcher): indexes = all_indexes(similar_matcher, element) matrix.append(indexes) - # print(matrix) - # print([list(x) for x in list(zip(*matrix))]) + # logging.debug(matrix) + # logging.debug([list(x) for x in list(zip(*matrix))]) # TODO: do some scoring to find the most "interesting combination" return [list(x) for x in list(zip(*matrix))] @@ -302,19 +305,19 @@ def get_matching(sample, other): def reprocess_language_knowledge(knowledge_base, examples): examples = knowledge_base.examples + examples - print('\n'.join(map(str, knowledge_base.examples))) - print("--") + logging.debug('\n'.join(map(str, knowledge_base.examples))) + logging.debug("--") pattern_examples = [] for i, sample in enumerate(examples): other = examples[:i] + examples[i + 1:] match = get_matching(sample, other) - print("->", match) + logging.debug("->", match) if len(match) > 0: sample = (match, sample[1],) pattern_examples.append(sample) - print() - print("\x1b[7m--\x1b[0m") + logging.debug("---") + logging.debug("\x1b[7m--\x1b[0m") return pattern_examples @@ -331,12 +334,12 @@ def fitting_return_type(knowledge, parsed_output = [] remaining_input = reverse_remix(input_stream, remixer) - print(indent + "RMXin:", remaining_input) + logging.debug(indent + "RMXin:", remaining_input) remaining_output = copy.deepcopy(sample) - print(indent + "S:", sample) - print(indent + "A:", ast) - print() + logging.debug(indent + "S:", sample) + logging.debug(indent + "A:", ast) + logging.debug("---") while len(remaining_output) > 0: for (elements, @@ -346,18 +349,18 @@ def fitting_return_type(knowledge, remaining_output, remaining_recursions - 1): parsed_input += elements - print(indent + "Elements:", elements) + logging.debug(indent + "Elements:", elements) break - print(indent + "Pi:", parsed_input) - print(indent + "Po:", parsed_output) - print("\x1b[7m" + indent + "Ri:", + logging.debug(indent + "Pi:", parsed_input) + logging.debug(indent + "Po:", parsed_output) + logging.debug("\x1b[7m" + indent + "Ri:", remaining_input, "\x1b[0m") - print("\x1b[7m" + indent + "Ro:", + logging.debug("\x1b[7m" + indent + "Ro:", remaining_output + tail_of_ouput_stream, "\x1b[0m") - print() + logging.debug("---") resolved_input = knowledge_evaluation.resolve(knowledge.knowledge, parsed_input, ast) if isinstance(resolved_input, ModifiableProperty): @@ -365,16 +368,16 @@ def fitting_return_type(knowledge, yield ([resolved_input], (remaining_input, remaining_output + tail_of_ouput_stream)) except TypeError as e: - print(indent + "X " + str(e)) + logging.debug(indent + "X " + str(e)) pass except ValueError as e: - print(indent + "X " + str(e)) + logging.debug(indent + "X " + str(e)) pass except IndexError as e: - print(indent + "X " + str(e)) + logging.debug(indent + "X " + str(e)) pass except KeyError as e: - print(indent + "X " + str(e)) + logging.debug(indent + "X " + str(e)) pass raise TypeError("No matching type found") @@ -393,14 +396,15 @@ def match_token(knowledge, if remaining_recursions < 1: yield None - # print("#" * (MAX_RECURSIONS - remaining_recursions)) - # print("Input:", input) - # print("Output:", trained) + # logging.debug("#" * (MAX_RECURSIONS - remaining_recursions)) + # logging.debug("Input:", input) + # logging.debug("Output:", trained) + depth_meter.show_depth(MAX_RECURSIONS - remaining_recursions) indent = " " + " " * (MAX_RECURSIONS - remaining_recursions) first_input = input[0] expected_first = trained[0] - print(indent + "Ex?", expected_first) - print(indent + "Fo!", first_input) + logging.debug(indent + "Ex?", expected_first) + logging.debug(indent + "Fo!", first_input) if isinstance(expected_first, dict): # TODO: check if the dictionary matches the values @@ -412,7 +416,7 @@ def match_token(knowledge, return_type, remixer, input, trained[1:], remaining_recursions): - print("-->", r) + logging.debug("-->", r) yield r elif expected_first == first_input: @@ -432,32 +436,32 @@ def get_fit_onwards(knowledge, ast, remaining_input, remaining_output, remaining remaining_input, remaining_output, remaining_recursions): - print("Nli:", input_for_next_level) - print("Nlo:", output_for_next_level) - print(indent + "E", elements) + logging.debug("Nli:", input_for_next_level) + logging.debug("Nlo:", output_for_next_level) + logging.debug(indent + "E", elements) try: result = get_fit_onwards(knowledge, ast, input_for_next_level, output_for_next_level, remaining_recursions) - print(indent + "→", result) + logging.debug(indent + "→", result) lower_elements, _ = result - print("<<<<< ELM:", elements, lower_elements) + logging.debug("<<<<< ELM:", elements, lower_elements) return elements + lower_elements, ast except TypeError as e: - print(indent + "X " + str(e)) + logging.debug(indent + "X " + str(e)) except IndexError as e: - print(indent + "X " + str(e)) + logging.debug(indent + "X " + str(e)) else: - print(indent + "Ri:", remaining_input) - print(indent + "Ro:", remaining_output) - print("OK") + logging.debug(indent + "Ri:", remaining_input) + logging.debug(indent + "Ro:", remaining_output) + logging.debug("OK") elif len(remaining_input) == 0 and len(remaining_input) == 0: - print("<<<<< AST:", ast) + logging.debug("<<<<< AST:", ast) return [], ast except TypeError as e: - print(indent + "X " + str(e)) + logging.debug(indent + "X " + str(e)) except IndexError as e: - print(indent + "X " + str(e)) + logging.debug(indent + "X " + str(e)) return None @@ -465,21 +469,21 @@ def get_fit(knowledge, row, remaining_recursions=MAX_RECURSIONS): tokens = to_tokens(row) indent = " " * (MAX_RECURSIONS - remaining_recursions) for sample, ast in knowledge.trained: - print("-----") - print("TOK:", tokens) + logging.debug("-----") + logging.debug("TOK:", tokens) try: remaining_input = copy.deepcopy(tokens) remaining_output = copy.deepcopy(sample) - print(indent + "AST:", ast) - print(indent + "S:", sample) + logging.debug(indent + "AST:", ast) + logging.debug(indent + "S:", sample) result = get_fit_onwards(knowledge, ast, remaining_input, remaining_output, remaining_recursions) if result is not None: return result except TypeError as e: - print(indent + "X " + str(e)) + logging.debug(indent + "X " + str(e)) except IndexError as e: - print(indent + "X " + str(e)) - print() + logging.debug(indent + "X " + str(e)) + logging.debug("---") else: return None diff --git a/naive-nlu/test.py b/naive-nlu/test.py index e27dd95..c213591 100644 --- a/naive-nlu/test.py +++ b/naive-nlu/test.py @@ -1,4 +1,5 @@ import json +import logging from knowledge_base import KnowledgeBase from modifiable_property import ModifiableProperty @@ -98,13 +99,13 @@ base_knowledge = { def test_assumption(expectedResponse, knowledge, query): - print("Query: {}".format(query['text'])) - print("Expected: {}".format(expectedResponse)) + logging.info("Query: {}".format(query['text'])) + logging.info("Expected: {}".format(expectedResponse)) result, abstract_tree, diff = knowledge.process(query['text']) end_result = result.getter() if isinstance(result, ModifiableProperty) else result - print("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result)) + logging.info("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result)) assert(end_result == expectedResponse) @@ -115,22 +116,23 @@ def main(): differences = knowledge.train(examples) - print("----") - print(differences()) - print("----") + logging.info("----") + logging.info(differences()) + logging.info("----") test_assumption(True, knowledge, {'text': 'earth is a planet'}) test_assumption(True, knowledge, {'text': 'is lava dangerous?'}) - # for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: - # row = test['text'] - # result, inferred_tree, differences = knowledge.process(row) + for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: + row = test['text'] + result, inferred_tree, differences = knowledge.process(row) + + logging.info("result:", result) + logging.info(differences()) + logging.info("---") + logging.info('-----') + logging.info(json.dumps(sorted(knowledge.knowledge.keys()), indent=4)) + logging.info('-----') - # print("result:", result) - # print(differences()) - # print() - # print('-----') - # print(json.dumps(sorted(knowledge.knowledge.keys()), indent=4)) - # print('-----') queryTrue = { "text": "is io a moon?", "parsed": ("question", ("pertenence-to-group", "io", "moon"))