Blindly replace all prints by logging.debug.

Also, add a "depth" meter to measure the depth level in the
tree matching.
This commit is contained in:
kenkeiras 2017-05-17 23:54:14 +02:00
parent b2b6be6c9a
commit 851ab1de20
5 changed files with 117 additions and 105 deletions

5
naive-nlu/depth_meter.py Normal file
View File

@ -0,0 +1,5 @@
import sys
def show_depth(depth: int):
sys.stdout.write("\r\x1b[K" + (u'' * int(depth / 2)) + (u'' * int(depth % 2)) + "\x1b[7m \x1b[0m\b")
sys.stdout.flush()

View File

@ -1,5 +1,6 @@
import copy
import logging
import parsing
import knowledge_evaluation
from modifiable_property import ModifiableProperty
@ -22,18 +23,18 @@ class KnowledgeBase(object):
# Parse everything
parsed_examples = []
for example in examples:
print("\x1b[7;32m> {} \x1b[0m".format(example))
logging.debug("\x1b[7;32m> {} \x1b[0m".format(example))
tokens, decomposition, inferred_tree = parsing.integrate_language(self, example)
print(tokens)
logging.debug(tokens)
result = knowledge_evaluation.integrate_information(self.knowledge, {
"elements": tokens,
"decomposition": decomposition,
"parsed": inferred_tree,
})
print("\x1b[7;33m< {} \x1b[0m".format(self.get_value(result)))
logging.debug("\x1b[7;33m< {} \x1b[0m".format(self.get_value(result)))
self.act_upon(result)
print("\x1b[7;34m< {} \x1b[0m".format(self.get_value(result)))
logging.debug("\x1b[7;34m> set: {} \x1b[0m".format(self.get_value(result)))
self.examples.append((decomposition, inferred_tree))
# Reduce values
@ -48,7 +49,7 @@ class KnowledgeBase(object):
def process(self, row):
knowledge_before = copy.deepcopy(self.knowledge)
print("\x1b[7;32m> {} \x1b[0m".format(row))
logging.debug("\x1b[7;32m> {} \x1b[0m".format(row))
tokens, inferred_tree = parsing.get_fit(self, row)
result = knowledge_evaluation.integrate_information(self.knowledge,
{
@ -73,4 +74,4 @@ class KnowledgeBase(object):
if isinstance(result, ModifiableProperty):
result.setter()
else:
print(result)
logging.debug(result)

View File

@ -37,8 +37,8 @@ def get_subquery_type(knowledge_base, atom):
def property_for_value(knowledge_base, value):
# print(value)
# print(knowledge_base[value])
# logging.debug(value)
# logging.debug(knowledge_base[value])
return knowledge_base[value]['as_property']

View File

@ -2,8 +2,11 @@
import knowledge_evaluation
import depth_meter
import logging
import re
import copy
from functools import reduce
from typing import List
from modifiable_property import ModifiableProperty
@ -78,28 +81,28 @@ def integrate_language(knowledge_base, example):
tokens = to_tokens(text)
while True:
print("P:", resolved_parsed)
logging.debug("P:", resolved_parsed)
lower_levels = get_lower_levels(resolved_parsed)
print("Lower:", lower_levels)
logging.debug("Lower:", lower_levels)
if len(lower_levels) == 0:
break
for position, atom in lower_levels:
print("\x1b[1mSelecting\x1b[0m:", atom)
logging.debug("\x1b[1mSelecting\x1b[0m:", atom)
similar = get_similar_tree(knowledge_base, atom)
print("___>", similar)
logging.debug("___>", similar)
remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar)
_, matcher, result = make_template(knowledge_base, tokens, atom)
print("Tx:", tokens)
print("Mx:", matcher)
print("Rx:", result)
print("Remix:", remix)
logging.debug("Tx:", tokens)
logging.debug("Mx:", matcher)
logging.debug("Rx:", result)
logging.debug("Remix:", remix)
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens))
print( " +->", after_remix)
logging.debug( " +->", after_remix)
subquery_type = knowledge_evaluation.get_subquery_type(knowledge_base.knowledge, atom)
print(r" \-> <{}>".format(subquery_type))
logging.debug(r" \-> <{}>".format(subquery_type))
# Clean remaining tokens
new_tokens = list(tokens)
@ -112,14 +115,14 @@ def integrate_language(knowledge_base, example):
tokens = new_tokens
resolved_parsed = replace_position(resolved_parsed, position, offset)
print("#########")
logging.debug("#########")
tokens, matcher, result = make_template(knowledge_base, tokens, resolved_parsed)
print("T:", tokens)
print("M:", matcher)
print("R:", result)
print()
logging.debug("T:", tokens)
logging.debug("M:", matcher)
logging.debug("R:", result)
logging.debug("---")
return tokens, matcher, result
@ -131,26 +134,26 @@ def apply_remix(tokens, remix):
def build_remix_matrix(knowledge_base, tokens, atom, similar):
# print("+" * 20)
# logging.debug("+" * 20)
tokens = list(tokens)
tokens, matcher, result = make_template(knowledge_base, tokens, atom)
similar_matcher, similar_result, similar_result_resolved, _ = similar
# print("NEW:")
# print("Tokens:", tokens)
# print("Matcher:", matcher)
# print("Result:", result)
# print()
# print("Similar:")
# print("Matcher:", similar_matcher)
# print("Result:", similar_result)
# logging.debug("NEW:")
# logging.debug("Tokens:", tokens)
# logging.debug("Matcher:", matcher)
# logging.debug("Result:", result)
# logging.debug("---")
# logging.debug("Similar:")
# logging.debug("Matcher:", similar_matcher)
# logging.debug("Result:", similar_result)
start_bounds, end_bounds = find_bounds(matcher, similar_matcher)
# print()
# print("Bounds:")
# print("Start:", start_bounds)
# print("End: ", end_bounds)
# logging.debug("---")
# logging.debug("Bounds:")
# logging.debug("Start:", start_bounds)
# logging.debug("End: ", end_bounds)
for i, element in (end_bounds + start_bounds[::-1]):
matcher.pop(i)
@ -159,20 +162,20 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher)
chosen_remix = possible_remixes[0]
# print("New tokens:", tokens)
# print("-" * 20)
# logging.debug("New tokens:", tokens)
# logging.debug("-" * 20)
return chosen_remix, (start_bounds, end_bounds)
def get_possible_remixes(knowledge_base, matcher, similar_matcher):
# print("*" * 20)
# print(matcher)
# print(similar_matcher)
# logging.debug("*" * 20)
# logging.debug(matcher)
# logging.debug(similar_matcher)
matrix = []
for element in matcher:
print("-", element)
print("+", similar_matcher)
logging.debug("-", element)
logging.debug("+", similar_matcher)
assert(element in similar_matcher or isinstance(element, dict))
if isinstance(element, dict):
@ -181,8 +184,8 @@ def get_possible_remixes(knowledge_base, matcher, similar_matcher):
indexes = all_indexes(similar_matcher, element)
matrix.append(indexes)
# print(matrix)
# print([list(x) for x in list(zip(*matrix))])
# logging.debug(matrix)
# logging.debug([list(x) for x in list(zip(*matrix))])
# TODO: do some scoring to find the most "interesting combination"
return [list(x) for x in list(zip(*matrix))]
@ -302,19 +305,19 @@ def get_matching(sample, other):
def reprocess_language_knowledge(knowledge_base, examples):
examples = knowledge_base.examples + examples
print('\n'.join(map(str, knowledge_base.examples)))
print("--")
logging.debug('\n'.join(map(str, knowledge_base.examples)))
logging.debug("--")
pattern_examples = []
for i, sample in enumerate(examples):
other = examples[:i] + examples[i + 1:]
match = get_matching(sample, other)
print("->", match)
logging.debug("->", match)
if len(match) > 0:
sample = (match, sample[1],)
pattern_examples.append(sample)
print()
print("\x1b[7m--\x1b[0m")
logging.debug("---")
logging.debug("\x1b[7m--\x1b[0m")
return pattern_examples
@ -331,12 +334,12 @@ def fitting_return_type(knowledge,
parsed_output = []
remaining_input = reverse_remix(input_stream, remixer)
print(indent + "RMXin:", remaining_input)
logging.debug(indent + "RMXin:", remaining_input)
remaining_output = copy.deepcopy(sample)
print(indent + "S:", sample)
print(indent + "A:", ast)
print()
logging.debug(indent + "S:", sample)
logging.debug(indent + "A:", ast)
logging.debug("---")
while len(remaining_output) > 0:
for (elements,
@ -346,18 +349,18 @@ def fitting_return_type(knowledge,
remaining_output,
remaining_recursions - 1):
parsed_input += elements
print(indent + "Elements:", elements)
logging.debug(indent + "Elements:", elements)
break
print(indent + "Pi:", parsed_input)
print(indent + "Po:", parsed_output)
print("\x1b[7m" + indent + "Ri:",
logging.debug(indent + "Pi:", parsed_input)
logging.debug(indent + "Po:", parsed_output)
logging.debug("\x1b[7m" + indent + "Ri:",
remaining_input,
"\x1b[0m")
print("\x1b[7m" + indent + "Ro:",
logging.debug("\x1b[7m" + indent + "Ro:",
remaining_output + tail_of_ouput_stream,
"\x1b[0m")
print()
logging.debug("---")
resolved_input = knowledge_evaluation.resolve(knowledge.knowledge,
parsed_input, ast)
if isinstance(resolved_input, ModifiableProperty):
@ -365,16 +368,16 @@ def fitting_return_type(knowledge,
yield ([resolved_input],
(remaining_input, remaining_output + tail_of_ouput_stream))
except TypeError as e:
print(indent + "X " + str(e))
logging.debug(indent + "X " + str(e))
pass
except ValueError as e:
print(indent + "X " + str(e))
logging.debug(indent + "X " + str(e))
pass
except IndexError as e:
print(indent + "X " + str(e))
logging.debug(indent + "X " + str(e))
pass
except KeyError as e:
print(indent + "X " + str(e))
logging.debug(indent + "X " + str(e))
pass
raise TypeError("No matching type found")
@ -393,14 +396,15 @@ def match_token(knowledge,
if remaining_recursions < 1:
yield None
# print("#" * (MAX_RECURSIONS - remaining_recursions))
# print("Input:", input)
# print("Output:", trained)
# logging.debug("#" * (MAX_RECURSIONS - remaining_recursions))
# logging.debug("Input:", input)
# logging.debug("Output:", trained)
depth_meter.show_depth(MAX_RECURSIONS - remaining_recursions)
indent = " " + " " * (MAX_RECURSIONS - remaining_recursions)
first_input = input[0]
expected_first = trained[0]
print(indent + "Ex?", expected_first)
print(indent + "Fo!", first_input)
logging.debug(indent + "Ex?", expected_first)
logging.debug(indent + "Fo!", first_input)
if isinstance(expected_first, dict):
# TODO: check if the dictionary matches the values
@ -412,7 +416,7 @@ def match_token(knowledge,
return_type, remixer,
input, trained[1:],
remaining_recursions):
print("-->", r)
logging.debug("-->", r)
yield r
elif expected_first == first_input:
@ -432,32 +436,32 @@ def get_fit_onwards(knowledge, ast, remaining_input, remaining_output, remaining
remaining_input,
remaining_output,
remaining_recursions):
print("Nli:", input_for_next_level)
print("Nlo:", output_for_next_level)
print(indent + "E", elements)
logging.debug("Nli:", input_for_next_level)
logging.debug("Nlo:", output_for_next_level)
logging.debug(indent + "E", elements)
try:
result = get_fit_onwards(knowledge, ast, input_for_next_level, output_for_next_level, remaining_recursions)
print(indent + "", result)
logging.debug(indent + "", result)
lower_elements, _ = result
print("<<<<< ELM:", elements, lower_elements)
logging.debug("<<<<< ELM:", elements, lower_elements)
return elements + lower_elements, ast
except TypeError as e:
print(indent + "X " + str(e))
logging.debug(indent + "X " + str(e))
except IndexError as e:
print(indent + "X " + str(e))
logging.debug(indent + "X " + str(e))
else:
print(indent + "Ri:", remaining_input)
print(indent + "Ro:", remaining_output)
print("OK")
logging.debug(indent + "Ri:", remaining_input)
logging.debug(indent + "Ro:", remaining_output)
logging.debug("OK")
elif len(remaining_input) == 0 and len(remaining_input) == 0:
print("<<<<< AST:", ast)
logging.debug("<<<<< AST:", ast)
return [], ast
except TypeError as e:
print(indent + "X " + str(e))
logging.debug(indent + "X " + str(e))
except IndexError as e:
print(indent + "X " + str(e))
logging.debug(indent + "X " + str(e))
return None
@ -465,21 +469,21 @@ def get_fit(knowledge, row, remaining_recursions=MAX_RECURSIONS):
tokens = to_tokens(row)
indent = " " * (MAX_RECURSIONS - remaining_recursions)
for sample, ast in knowledge.trained:
print("-----")
print("TOK:", tokens)
logging.debug("-----")
logging.debug("TOK:", tokens)
try:
remaining_input = copy.deepcopy(tokens)
remaining_output = copy.deepcopy(sample)
print(indent + "AST:", ast)
print(indent + "S:", sample)
logging.debug(indent + "AST:", ast)
logging.debug(indent + "S:", sample)
result = get_fit_onwards(knowledge, ast, remaining_input,
remaining_output, remaining_recursions)
if result is not None:
return result
except TypeError as e:
print(indent + "X " + str(e))
logging.debug(indent + "X " + str(e))
except IndexError as e:
print(indent + "X " + str(e))
print()
logging.debug(indent + "X " + str(e))
logging.debug("---")
else:
return None

View File

@ -1,4 +1,5 @@
import json
import logging
from knowledge_base import KnowledgeBase
from modifiable_property import ModifiableProperty
@ -98,13 +99,13 @@ base_knowledge = {
def test_assumption(expectedResponse, knowledge, query):
print("Query: {}".format(query['text']))
print("Expected: {}".format(expectedResponse))
logging.info("Query: {}".format(query['text']))
logging.info("Expected: {}".format(expectedResponse))
result, abstract_tree, diff = knowledge.process(query['text'])
end_result = result.getter() if isinstance(result, ModifiableProperty) else result
print("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result))
logging.info("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result))
assert(end_result == expectedResponse)
@ -115,22 +116,23 @@ def main():
differences = knowledge.train(examples)
print("----")
print(differences())
print("----")
logging.info("----")
logging.info(differences())
logging.info("----")
test_assumption(True, knowledge, {'text': 'earth is a planet'})
test_assumption(True, knowledge, {'text': 'is lava dangerous?'})
# for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
# row = test['text']
# result, inferred_tree, differences = knowledge.process(row)
for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
row = test['text']
result, inferred_tree, differences = knowledge.process(row)
logging.info("result:", result)
logging.info(differences())
logging.info("---")
logging.info('-----')
logging.info(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
logging.info('-----')
# print("result:", result)
# print(differences())
# print()
# print('-----')
# print(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
# print('-----')
queryTrue = {
"text": "is io a moon?",
"parsed": ("question", ("pertenence-to-group", "io", "moon"))