Blindly replace all prints by logging.debug.

Also, add a "depth" meter to measure the depth level in the
tree matching.
This commit is contained in:
kenkeiras 2017-05-17 23:54:14 +02:00
parent b2b6be6c9a
commit 851ab1de20
5 changed files with 117 additions and 105 deletions

5
naive-nlu/depth_meter.py Normal file
View File

@ -0,0 +1,5 @@
import sys
def show_depth(depth: int):
sys.stdout.write("\r\x1b[K" + (u'' * int(depth / 2)) + (u'' * int(depth % 2)) + "\x1b[7m \x1b[0m\b")
sys.stdout.flush()

View File

@ -1,5 +1,6 @@
import copy import copy
import logging
import parsing import parsing
import knowledge_evaluation import knowledge_evaluation
from modifiable_property import ModifiableProperty from modifiable_property import ModifiableProperty
@ -22,18 +23,18 @@ class KnowledgeBase(object):
# Parse everything # Parse everything
parsed_examples = [] parsed_examples = []
for example in examples: for example in examples:
print("\x1b[7;32m> {} \x1b[0m".format(example)) logging.debug("\x1b[7;32m> {} \x1b[0m".format(example))
tokens, decomposition, inferred_tree = parsing.integrate_language(self, example) tokens, decomposition, inferred_tree = parsing.integrate_language(self, example)
print(tokens) logging.debug(tokens)
result = knowledge_evaluation.integrate_information(self.knowledge, { result = knowledge_evaluation.integrate_information(self.knowledge, {
"elements": tokens, "elements": tokens,
"decomposition": decomposition, "decomposition": decomposition,
"parsed": inferred_tree, "parsed": inferred_tree,
}) })
print("\x1b[7;33m< {} \x1b[0m".format(self.get_value(result))) logging.debug("\x1b[7;33m< {} \x1b[0m".format(self.get_value(result)))
self.act_upon(result) self.act_upon(result)
print("\x1b[7;34m< {} \x1b[0m".format(self.get_value(result))) logging.debug("\x1b[7;34m> set: {} \x1b[0m".format(self.get_value(result)))
self.examples.append((decomposition, inferred_tree)) self.examples.append((decomposition, inferred_tree))
# Reduce values # Reduce values
@ -48,7 +49,7 @@ class KnowledgeBase(object):
def process(self, row): def process(self, row):
knowledge_before = copy.deepcopy(self.knowledge) knowledge_before = copy.deepcopy(self.knowledge)
print("\x1b[7;32m> {} \x1b[0m".format(row)) logging.debug("\x1b[7;32m> {} \x1b[0m".format(row))
tokens, inferred_tree = parsing.get_fit(self, row) tokens, inferred_tree = parsing.get_fit(self, row)
result = knowledge_evaluation.integrate_information(self.knowledge, result = knowledge_evaluation.integrate_information(self.knowledge,
{ {
@ -73,4 +74,4 @@ class KnowledgeBase(object):
if isinstance(result, ModifiableProperty): if isinstance(result, ModifiableProperty):
result.setter() result.setter()
else: else:
print(result) logging.debug(result)

View File

@ -37,8 +37,8 @@ def get_subquery_type(knowledge_base, atom):
def property_for_value(knowledge_base, value): def property_for_value(knowledge_base, value):
# print(value) # logging.debug(value)
# print(knowledge_base[value]) # logging.debug(knowledge_base[value])
return knowledge_base[value]['as_property'] return knowledge_base[value]['as_property']

View File

@ -2,8 +2,11 @@
import knowledge_evaluation import knowledge_evaluation
import depth_meter
import logging
import re import re
import copy import copy
from functools import reduce from functools import reduce
from typing import List from typing import List
from modifiable_property import ModifiableProperty from modifiable_property import ModifiableProperty
@ -78,28 +81,28 @@ def integrate_language(knowledge_base, example):
tokens = to_tokens(text) tokens = to_tokens(text)
while True: while True:
print("P:", resolved_parsed) logging.debug("P:", resolved_parsed)
lower_levels = get_lower_levels(resolved_parsed) lower_levels = get_lower_levels(resolved_parsed)
print("Lower:", lower_levels) logging.debug("Lower:", lower_levels)
if len(lower_levels) == 0: if len(lower_levels) == 0:
break break
for position, atom in lower_levels: for position, atom in lower_levels:
print("\x1b[1mSelecting\x1b[0m:", atom) logging.debug("\x1b[1mSelecting\x1b[0m:", atom)
similar = get_similar_tree(knowledge_base, atom) similar = get_similar_tree(knowledge_base, atom)
print("___>", similar) logging.debug("___>", similar)
remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar) remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar)
_, matcher, result = make_template(knowledge_base, tokens, atom) _, matcher, result = make_template(knowledge_base, tokens, atom)
print("Tx:", tokens) logging.debug("Tx:", tokens)
print("Mx:", matcher) logging.debug("Mx:", matcher)
print("Rx:", result) logging.debug("Rx:", result)
print("Remix:", remix) logging.debug("Remix:", remix)
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix) after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens)) assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens))
print( " +->", after_remix) logging.debug( " +->", after_remix)
subquery_type = knowledge_evaluation.get_subquery_type(knowledge_base.knowledge, atom) subquery_type = knowledge_evaluation.get_subquery_type(knowledge_base.knowledge, atom)
print(r" \-> <{}>".format(subquery_type)) logging.debug(r" \-> <{}>".format(subquery_type))
# Clean remaining tokens # Clean remaining tokens
new_tokens = list(tokens) new_tokens = list(tokens)
@ -112,14 +115,14 @@ def integrate_language(knowledge_base, example):
tokens = new_tokens tokens = new_tokens
resolved_parsed = replace_position(resolved_parsed, position, offset) resolved_parsed = replace_position(resolved_parsed, position, offset)
print("#########") logging.debug("#########")
tokens, matcher, result = make_template(knowledge_base, tokens, resolved_parsed) tokens, matcher, result = make_template(knowledge_base, tokens, resolved_parsed)
print("T:", tokens) logging.debug("T:", tokens)
print("M:", matcher) logging.debug("M:", matcher)
print("R:", result) logging.debug("R:", result)
print() logging.debug("---")
return tokens, matcher, result return tokens, matcher, result
@ -131,26 +134,26 @@ def apply_remix(tokens, remix):
def build_remix_matrix(knowledge_base, tokens, atom, similar): def build_remix_matrix(knowledge_base, tokens, atom, similar):
# print("+" * 20) # logging.debug("+" * 20)
tokens = list(tokens) tokens = list(tokens)
tokens, matcher, result = make_template(knowledge_base, tokens, atom) tokens, matcher, result = make_template(knowledge_base, tokens, atom)
similar_matcher, similar_result, similar_result_resolved, _ = similar similar_matcher, similar_result, similar_result_resolved, _ = similar
# print("NEW:") # logging.debug("NEW:")
# print("Tokens:", tokens) # logging.debug("Tokens:", tokens)
# print("Matcher:", matcher) # logging.debug("Matcher:", matcher)
# print("Result:", result) # logging.debug("Result:", result)
# print() # logging.debug("---")
# print("Similar:") # logging.debug("Similar:")
# print("Matcher:", similar_matcher) # logging.debug("Matcher:", similar_matcher)
# print("Result:", similar_result) # logging.debug("Result:", similar_result)
start_bounds, end_bounds = find_bounds(matcher, similar_matcher) start_bounds, end_bounds = find_bounds(matcher, similar_matcher)
# print() # logging.debug("---")
# print("Bounds:") # logging.debug("Bounds:")
# print("Start:", start_bounds) # logging.debug("Start:", start_bounds)
# print("End: ", end_bounds) # logging.debug("End: ", end_bounds)
for i, element in (end_bounds + start_bounds[::-1]): for i, element in (end_bounds + start_bounds[::-1]):
matcher.pop(i) matcher.pop(i)
@ -159,20 +162,20 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher) possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher)
chosen_remix = possible_remixes[0] chosen_remix = possible_remixes[0]
# print("New tokens:", tokens) # logging.debug("New tokens:", tokens)
# print("-" * 20) # logging.debug("-" * 20)
return chosen_remix, (start_bounds, end_bounds) return chosen_remix, (start_bounds, end_bounds)
def get_possible_remixes(knowledge_base, matcher, similar_matcher): def get_possible_remixes(knowledge_base, matcher, similar_matcher):
# print("*" * 20) # logging.debug("*" * 20)
# print(matcher) # logging.debug(matcher)
# print(similar_matcher) # logging.debug(similar_matcher)
matrix = [] matrix = []
for element in matcher: for element in matcher:
print("-", element) logging.debug("-", element)
print("+", similar_matcher) logging.debug("+", similar_matcher)
assert(element in similar_matcher or isinstance(element, dict)) assert(element in similar_matcher or isinstance(element, dict))
if isinstance(element, dict): if isinstance(element, dict):
@ -181,8 +184,8 @@ def get_possible_remixes(knowledge_base, matcher, similar_matcher):
indexes = all_indexes(similar_matcher, element) indexes = all_indexes(similar_matcher, element)
matrix.append(indexes) matrix.append(indexes)
# print(matrix) # logging.debug(matrix)
# print([list(x) for x in list(zip(*matrix))]) # logging.debug([list(x) for x in list(zip(*matrix))])
# TODO: do some scoring to find the most "interesting combination" # TODO: do some scoring to find the most "interesting combination"
return [list(x) for x in list(zip(*matrix))] return [list(x) for x in list(zip(*matrix))]
@ -302,19 +305,19 @@ def get_matching(sample, other):
def reprocess_language_knowledge(knowledge_base, examples): def reprocess_language_knowledge(knowledge_base, examples):
examples = knowledge_base.examples + examples examples = knowledge_base.examples + examples
print('\n'.join(map(str, knowledge_base.examples))) logging.debug('\n'.join(map(str, knowledge_base.examples)))
print("--") logging.debug("--")
pattern_examples = [] pattern_examples = []
for i, sample in enumerate(examples): for i, sample in enumerate(examples):
other = examples[:i] + examples[i + 1:] other = examples[:i] + examples[i + 1:]
match = get_matching(sample, other) match = get_matching(sample, other)
print("->", match) logging.debug("->", match)
if len(match) > 0: if len(match) > 0:
sample = (match, sample[1],) sample = (match, sample[1],)
pattern_examples.append(sample) pattern_examples.append(sample)
print() logging.debug("---")
print("\x1b[7m--\x1b[0m") logging.debug("\x1b[7m--\x1b[0m")
return pattern_examples return pattern_examples
@ -331,12 +334,12 @@ def fitting_return_type(knowledge,
parsed_output = [] parsed_output = []
remaining_input = reverse_remix(input_stream, remixer) remaining_input = reverse_remix(input_stream, remixer)
print(indent + "RMXin:", remaining_input) logging.debug(indent + "RMXin:", remaining_input)
remaining_output = copy.deepcopy(sample) remaining_output = copy.deepcopy(sample)
print(indent + "S:", sample) logging.debug(indent + "S:", sample)
print(indent + "A:", ast) logging.debug(indent + "A:", ast)
print() logging.debug("---")
while len(remaining_output) > 0: while len(remaining_output) > 0:
for (elements, for (elements,
@ -346,18 +349,18 @@ def fitting_return_type(knowledge,
remaining_output, remaining_output,
remaining_recursions - 1): remaining_recursions - 1):
parsed_input += elements parsed_input += elements
print(indent + "Elements:", elements) logging.debug(indent + "Elements:", elements)
break break
print(indent + "Pi:", parsed_input) logging.debug(indent + "Pi:", parsed_input)
print(indent + "Po:", parsed_output) logging.debug(indent + "Po:", parsed_output)
print("\x1b[7m" + indent + "Ri:", logging.debug("\x1b[7m" + indent + "Ri:",
remaining_input, remaining_input,
"\x1b[0m") "\x1b[0m")
print("\x1b[7m" + indent + "Ro:", logging.debug("\x1b[7m" + indent + "Ro:",
remaining_output + tail_of_ouput_stream, remaining_output + tail_of_ouput_stream,
"\x1b[0m") "\x1b[0m")
print() logging.debug("---")
resolved_input = knowledge_evaluation.resolve(knowledge.knowledge, resolved_input = knowledge_evaluation.resolve(knowledge.knowledge,
parsed_input, ast) parsed_input, ast)
if isinstance(resolved_input, ModifiableProperty): if isinstance(resolved_input, ModifiableProperty):
@ -365,16 +368,16 @@ def fitting_return_type(knowledge,
yield ([resolved_input], yield ([resolved_input],
(remaining_input, remaining_output + tail_of_ouput_stream)) (remaining_input, remaining_output + tail_of_ouput_stream))
except TypeError as e: except TypeError as e:
print(indent + "X " + str(e)) logging.debug(indent + "X " + str(e))
pass pass
except ValueError as e: except ValueError as e:
print(indent + "X " + str(e)) logging.debug(indent + "X " + str(e))
pass pass
except IndexError as e: except IndexError as e:
print(indent + "X " + str(e)) logging.debug(indent + "X " + str(e))
pass pass
except KeyError as e: except KeyError as e:
print(indent + "X " + str(e)) logging.debug(indent + "X " + str(e))
pass pass
raise TypeError("No matching type found") raise TypeError("No matching type found")
@ -393,14 +396,15 @@ def match_token(knowledge,
if remaining_recursions < 1: if remaining_recursions < 1:
yield None yield None
# print("#" * (MAX_RECURSIONS - remaining_recursions)) # logging.debug("#" * (MAX_RECURSIONS - remaining_recursions))
# print("Input:", input) # logging.debug("Input:", input)
# print("Output:", trained) # logging.debug("Output:", trained)
depth_meter.show_depth(MAX_RECURSIONS - remaining_recursions)
indent = " " + " " * (MAX_RECURSIONS - remaining_recursions) indent = " " + " " * (MAX_RECURSIONS - remaining_recursions)
first_input = input[0] first_input = input[0]
expected_first = trained[0] expected_first = trained[0]
print(indent + "Ex?", expected_first) logging.debug(indent + "Ex?", expected_first)
print(indent + "Fo!", first_input) logging.debug(indent + "Fo!", first_input)
if isinstance(expected_first, dict): if isinstance(expected_first, dict):
# TODO: check if the dictionary matches the values # TODO: check if the dictionary matches the values
@ -412,7 +416,7 @@ def match_token(knowledge,
return_type, remixer, return_type, remixer,
input, trained[1:], input, trained[1:],
remaining_recursions): remaining_recursions):
print("-->", r) logging.debug("-->", r)
yield r yield r
elif expected_first == first_input: elif expected_first == first_input:
@ -432,32 +436,32 @@ def get_fit_onwards(knowledge, ast, remaining_input, remaining_output, remaining
remaining_input, remaining_input,
remaining_output, remaining_output,
remaining_recursions): remaining_recursions):
print("Nli:", input_for_next_level) logging.debug("Nli:", input_for_next_level)
print("Nlo:", output_for_next_level) logging.debug("Nlo:", output_for_next_level)
print(indent + "E", elements) logging.debug(indent + "E", elements)
try: try:
result = get_fit_onwards(knowledge, ast, input_for_next_level, output_for_next_level, remaining_recursions) result = get_fit_onwards(knowledge, ast, input_for_next_level, output_for_next_level, remaining_recursions)
print(indent + "", result) logging.debug(indent + "", result)
lower_elements, _ = result lower_elements, _ = result
print("<<<<< ELM:", elements, lower_elements) logging.debug("<<<<< ELM:", elements, lower_elements)
return elements + lower_elements, ast return elements + lower_elements, ast
except TypeError as e: except TypeError as e:
print(indent + "X " + str(e)) logging.debug(indent + "X " + str(e))
except IndexError as e: except IndexError as e:
print(indent + "X " + str(e)) logging.debug(indent + "X " + str(e))
else: else:
print(indent + "Ri:", remaining_input) logging.debug(indent + "Ri:", remaining_input)
print(indent + "Ro:", remaining_output) logging.debug(indent + "Ro:", remaining_output)
print("OK") logging.debug("OK")
elif len(remaining_input) == 0 and len(remaining_input) == 0: elif len(remaining_input) == 0 and len(remaining_input) == 0:
print("<<<<< AST:", ast) logging.debug("<<<<< AST:", ast)
return [], ast return [], ast
except TypeError as e: except TypeError as e:
print(indent + "X " + str(e)) logging.debug(indent + "X " + str(e))
except IndexError as e: except IndexError as e:
print(indent + "X " + str(e)) logging.debug(indent + "X " + str(e))
return None return None
@ -465,21 +469,21 @@ def get_fit(knowledge, row, remaining_recursions=MAX_RECURSIONS):
tokens = to_tokens(row) tokens = to_tokens(row)
indent = " " * (MAX_RECURSIONS - remaining_recursions) indent = " " * (MAX_RECURSIONS - remaining_recursions)
for sample, ast in knowledge.trained: for sample, ast in knowledge.trained:
print("-----") logging.debug("-----")
print("TOK:", tokens) logging.debug("TOK:", tokens)
try: try:
remaining_input = copy.deepcopy(tokens) remaining_input = copy.deepcopy(tokens)
remaining_output = copy.deepcopy(sample) remaining_output = copy.deepcopy(sample)
print(indent + "AST:", ast) logging.debug(indent + "AST:", ast)
print(indent + "S:", sample) logging.debug(indent + "S:", sample)
result = get_fit_onwards(knowledge, ast, remaining_input, result = get_fit_onwards(knowledge, ast, remaining_input,
remaining_output, remaining_recursions) remaining_output, remaining_recursions)
if result is not None: if result is not None:
return result return result
except TypeError as e: except TypeError as e:
print(indent + "X " + str(e)) logging.debug(indent + "X " + str(e))
except IndexError as e: except IndexError as e:
print(indent + "X " + str(e)) logging.debug(indent + "X " + str(e))
print() logging.debug("---")
else: else:
return None return None

View File

@ -1,4 +1,5 @@
import json import json
import logging
from knowledge_base import KnowledgeBase from knowledge_base import KnowledgeBase
from modifiable_property import ModifiableProperty from modifiable_property import ModifiableProperty
@ -98,13 +99,13 @@ base_knowledge = {
def test_assumption(expectedResponse, knowledge, query): def test_assumption(expectedResponse, knowledge, query):
print("Query: {}".format(query['text'])) logging.info("Query: {}".format(query['text']))
print("Expected: {}".format(expectedResponse)) logging.info("Expected: {}".format(expectedResponse))
result, abstract_tree, diff = knowledge.process(query['text']) result, abstract_tree, diff = knowledge.process(query['text'])
end_result = result.getter() if isinstance(result, ModifiableProperty) else result end_result = result.getter() if isinstance(result, ModifiableProperty) else result
print("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result)) logging.info("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result))
assert(end_result == expectedResponse) assert(end_result == expectedResponse)
@ -115,22 +116,23 @@ def main():
differences = knowledge.train(examples) differences = knowledge.train(examples)
print("----") logging.info("----")
print(differences()) logging.info(differences())
print("----") logging.info("----")
test_assumption(True, knowledge, {'text': 'earth is a planet'}) test_assumption(True, knowledge, {'text': 'earth is a planet'})
test_assumption(True, knowledge, {'text': 'is lava dangerous?'}) test_assumption(True, knowledge, {'text': 'is lava dangerous?'})
# for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
# row = test['text'] row = test['text']
# result, inferred_tree, differences = knowledge.process(row) result, inferred_tree, differences = knowledge.process(row)
logging.info("result:", result)
logging.info(differences())
logging.info("---")
logging.info('-----')
logging.info(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
logging.info('-----')
# print("result:", result)
# print(differences())
# print()
# print('-----')
# print(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
# print('-----')
queryTrue = { queryTrue = {
"text": "is io a moon?", "text": "is io a moon?",
"parsed": ("question", ("pertenence-to-group", "io", "moon")) "parsed": ("question", ("pertenence-to-group", "io", "moon"))