Fix exploration of multiple options on a sub-level.

This commit is contained in:
kenkeiras 2017-05-17 00:27:23 +02:00
parent ce7b50e023
commit 63227c4f3e
3 changed files with 144 additions and 67 deletions

View File

@ -49,11 +49,10 @@ class KnowledgeBase(object):
def process(self, row): def process(self, row):
knowledge_before = copy.deepcopy(self.knowledge) knowledge_before = copy.deepcopy(self.knowledge)
print("\x1b[7;32m> {} \x1b[0m".format(row)) print("\x1b[7;32m> {} \x1b[0m".format(row))
tokens, decomposition, inferred_tree = parsing.get_fit(self, row) tokens, inferred_tree = parsing.get_fit(self, row)
result = knowledge_evaluation.integrate_information(self.knowledge, result = knowledge_evaluation.integrate_information(self.knowledge,
{ {
"elements": tokens, "elements": tokens,
"decomposition": decomposition,
"parsed": inferred_tree, "parsed": inferred_tree,
}) })
self.act_upon(result) self.act_upon(result)

View File

@ -7,7 +7,7 @@ import copy
from functools import reduce from functools import reduce
from typing import List from typing import List
MAX_RECURSIONS = 10 MAX_RECURSIONS = 5
# TODO: more flexible tokenization # TODO: more flexible tokenization
def to_tokens(text): def to_tokens(text):
@ -155,7 +155,7 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
matcher.pop(i) matcher.pop(i)
tokens.pop(i) tokens.pop(i)
possible_remixes = get_possible_remixes(matcher, similar_matcher) possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher)
chosen_remix = possible_remixes[0] chosen_remix = possible_remixes[0]
# print("New tokens:", tokens) # print("New tokens:", tokens)
@ -163,15 +163,21 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
return chosen_remix, (start_bounds, end_bounds) return chosen_remix, (start_bounds, end_bounds)
def get_possible_remixes(matcher, similar_matcher): def get_possible_remixes(knowledge_base, matcher, similar_matcher):
# print("*" * 20) # print("*" * 20)
# print(matcher) # print(matcher)
# print(similar_matcher) # print(similar_matcher)
matrix = [] matrix = []
for element in matcher: for element in matcher:
assert(element in similar_matcher) print("-", element)
indexes = all_indexes(similar_matcher, element) print("+", similar_matcher)
assert(element in similar_matcher or isinstance(element, dict))
if isinstance(element, dict):
indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
else:
indexes = all_indexes(similar_matcher, element)
matrix.append(indexes) matrix.append(indexes)
# print(matrix) # print(matrix)
@ -192,6 +198,24 @@ def all_indexes(collection, element):
return indexes return indexes
def all_matching_indexes(knowledge_base, collection, element):
indexes = []
assert("groups" in element)
element = element["groups"]
for i, instance in enumerate(collection):
if isinstance(instance, dict):
instance = instance["groups"]
elif instance in knowledge_base.knowledge:
instance = knowledge_base.knowledge[instance]["groups"]
intersection = set(instance) & set(element)
if len(intersection) > 0:
indexes.append((i, intersection))
return [x[0] for x in sorted(indexes, key=lambda x: len(x[1]), reverse=True)]
def find_bounds(matcher, similar_matcher): def find_bounds(matcher, similar_matcher):
start_bounds = [] start_bounds = []
for i, element in enumerate(matcher): for i, element in enumerate(matcher):
@ -314,27 +338,33 @@ def fitting_return_type(knowledge,
print() print()
while len(remaining_output) > 0: while len(remaining_output) > 0:
((input, output), for (elements,
(remaining_input, remaining_output)) = match_token(knowledge, (remaining_input,
remaining_input, remaining_output)) in match_token(knowledge,
remaining_output, remaining_input,
remaining_recursions - 1) remaining_output,
parsed_input += input remaining_recursions - 1):
parsed_output += output parsed_input += elements
print(indent + "INP:", input) print(indent + "Elements:", elements)
print(indent + "OUT:", output) break
print(indent + "Pi:", parsed_input) print(indent + "Pi:", parsed_input)
print(indent + "Po:", parsed_output) print(indent + "Po:", parsed_output)
print("\x1b[7m", end='') print("\x1b[7m" + indent + "Ri:",
print(indent + "Ri:", remaining_input) remaining_input,
print(indent + "Ro:", remaining_output) "\x1b[0m]")
print("\x1b[0m") print("\x1b[7m" + indent + "Ro:",
return ((parsed_input, parsed_output), remaining_output + tail_of_ouput_stream,
(remaining_input, remaining_output + tail_of_ouput_stream)) "\x1b[0m]")
print()
yield (parsed_input,
(remaining_input, remaining_output + tail_of_ouput_stream))
except TypeError as e: except TypeError as e:
print(indent + "X " + str(e)) print(indent + "X " + str(e))
pass pass
except ValueError as e:
print(indent + "X " + str(e))
pass
except IndexError as e: except IndexError as e:
print(indent + "X " + str(e)) print(indent + "X " + str(e))
pass pass
@ -353,7 +383,7 @@ def match_token(knowledge,
trained: List[str], trained: List[str],
remaining_recursions: int): remaining_recursions: int):
if remaining_recursions < 1: if remaining_recursions < 1:
return None yield None
# print("#" * (MAX_RECURSIONS - remaining_recursions)) # print("#" * (MAX_RECURSIONS - remaining_recursions))
# print("Input:", input) # print("Input:", input)
@ -366,18 +396,60 @@ def match_token(knowledge,
if isinstance(expected_first, dict): if isinstance(expected_first, dict):
# TODO: check if the dictionary matches the values # TODO: check if the dictionary matches the values
return (([first_input], [expected_first]), (input[1:], trained[1:])) yield (([first_input]), (input[1:], trained[1:]))
elif isinstance(expected_first, tuple): elif isinstance(expected_first, tuple):
return_type, remixer = expected_first return_type, remixer = expected_first
return fitting_return_type(knowledge, for r in fitting_return_type(knowledge,
return_type, remixer, return_type, remixer,
input, trained[1:], input, trained[1:],
remaining_recursions) remaining_recursions):
print("-->", r)
yield r
elif expected_first == first_input: elif expected_first == first_input:
return (([first_input], [expected_first]), (input[1:], trained[1:])) yield (([first_input]), (input[1:], trained[1:]))
yield None
def get_fit_onwards(knowledge, ast, remaining_input, remaining_output, remaining_recursions):
indent = "." + " " * (MAX_RECURSIONS - remaining_recursions)
try:
# TODO: merge with get_return type, as uses the same mechanism
if len(remaining_output) > 0:
for (elements,
(input_for_next_level,
output_for_next_level)) in match_token(knowledge,
remaining_input,
remaining_output,
remaining_recursions):
print("Nli:", input_for_next_level)
print("Nlo:", output_for_next_level)
print(indent + "E", elements)
try:
result = get_fit_onwards(knowledge, ast, input_for_next_level, output_for_next_level, remaining_recursions)
print(indent + "", result)
lower_elements, _ = result
print("<<<<< ELM:", elements, lower_elements)
return elements + lower_elements, ast
except TypeError as e:
print(indent + "X " + str(e))
except IndexError as e:
print(indent + "X " + str(e))
else:
print(indent + "Ri:", remaining_input)
print(indent + "Ro:", remaining_output)
print("OK")
elif len(remaining_input) == 0 and len(remaining_input) == 0:
print("<<<<< AST:", ast)
return [], ast
except TypeError as e:
print(indent + "X " + str(e))
except IndexError as e:
print(indent + "X " + str(e))
return None return None
@ -392,25 +464,14 @@ def get_fit(knowledge, row, remaining_recursions=MAX_RECURSIONS):
remaining_output = copy.deepcopy(sample) remaining_output = copy.deepcopy(sample)
print(indent + "AST:", ast) print(indent + "AST:", ast)
print(indent + "S:", sample) print(indent + "S:", sample)
result = get_fit_onwards(knowledge, ast, remaining_input,
# TODO: merge with get_return type, as uses the same mechanism remaining_output, remaining_recursions)
while len(remaining_output) > 0: if result is not None:
((_, _), (remaining_input, remaining_output)) = match_token(knowledge, return result
remaining_input,
remaining_output,
remaining_recursions)
print(indent + "Ri:", remaining_input)
print(indent + "Ro:", remaining_output)
if len(remaining_input) == 0 and len(remaining_input) == 0:
print("!!!", tokens, sample, ast)
return tokens, sample, ast
except TypeError as e: except TypeError as e:
print(indent + "X " + str(e)) print(indent + "X " + str(e))
pass
except IndexError as e: except IndexError as e:
print(indent + "X " + str(e)) print(indent + "X " + str(e))
pass
print() print()
else: else:
return None return None

View File

@ -16,30 +16,34 @@ examples = [
"text": "lava is dangerous", "text": "lava is dangerous",
"parsed": ("exists-property-with-value", 'lava', 'dangerous') "parsed": ("exists-property-with-value", 'lava', 'dangerous')
}, },
# { {
# "text": "is lava dangerous?", "text": "is lava dangerous?",
# "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')), "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')),
# }, },
{ {
"text": "earth is a planet", "text": "earth is a planet",
"parsed": ("pertenence-to-group", 'earth', 'planet'), "parsed": ("pertenence-to-group", 'earth', 'planet'),
}, },
# { {
# "text": "is earth a moon?", "text": "io is a moon",
# "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')), "parsed": ("pertenence-to-group", 'io', 'moon'),
# }, },
# { {
# "text": "Green is a color", "text": "is earth a moon?",
# "parsed": ("pertenence-to-group", 'green', 'color'), "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')),
# }, },
# { {
# "text": "a plane can fly", "text": "Green is a color",
# "parsed": ("has-capacity", 'plane', 'fly') "parsed": ("pertenence-to-group", 'green', 'color'),
# }, },
# { {
# "text": "a wale can swim", "text": "a plane can fly",
# "parsed": ("has-capacity", 'wale', 'swim') "parsed": ("has-capacity", 'plane', 'fly')
# }, },
{
"text": "a wale can swim",
"parsed": ("has-capacity", 'wale', 'swim')
},
] ]
base_knowledge = { base_knowledge = {
@ -52,6 +56,9 @@ base_knowledge = {
'earth': { 'earth': {
"groups": set(['noun', 'object', 'planet']), "groups": set(['noun', 'object', 'planet']),
}, },
'io': {
"groups": set(['noun', 'object']),
},
'green': { 'green': {
"groups": set(['noun', 'color', 'concept']), "groups": set(['noun', 'color', 'concept']),
}, },
@ -75,6 +82,9 @@ base_knowledge = {
'planet': { 'planet': {
"groups": set(['noun', 'group']), "groups": set(['noun', 'group']),
}, },
'moon': {
"groups": set(['noun', 'group']),
},
'color': { 'color': {
"groups": set(['property', 'group']), "groups": set(['property', 'group']),
}, },
@ -121,10 +131,17 @@ def main():
# print('-----') # print('-----')
# print(json.dumps(sorted(knowledge.knowledge.keys()), indent=4)) # print(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
# print('-----') # print('-----')
# queryTrue = { "text": "is io a moon?", "parsed": ("question", ("pertenence-to-group", "io", "moon")) } queryTrue = {
# queryFalse = { "text": "is io a planet?", "parsed": ("question", ("pertenence-to-group", "io", "planet")) } "text": "is io a moon?",
"parsed": ("question", ("pertenence-to-group", "io", "moon"))
}
queryFalse = {
"text": "is io a planet?",
"parsed": ("question", ("pertenence-to-group", "io", "planet"))
}
# test_assumption(False, knowledge, queryFalse) test_assumption(False, knowledge, queryFalse)
test_assumption(True, knowledge, queryTrue)
if __name__ == '__main__': if __name__ == '__main__':
main() main()