Fix exploration of multiple options on a sub-level.

This commit is contained in:
kenkeiras 2017-05-17 00:27:23 +02:00
parent ce7b50e023
commit 63227c4f3e
3 changed files with 144 additions and 67 deletions

View File

@ -49,11 +49,10 @@ class KnowledgeBase(object):
def process(self, row):
knowledge_before = copy.deepcopy(self.knowledge)
print("\x1b[7;32m> {} \x1b[0m".format(row))
tokens, decomposition, inferred_tree = parsing.get_fit(self, row)
tokens, inferred_tree = parsing.get_fit(self, row)
result = knowledge_evaluation.integrate_information(self.knowledge,
{
"elements": tokens,
"decomposition": decomposition,
"parsed": inferred_tree,
})
self.act_upon(result)

View File

@ -7,7 +7,7 @@ import copy
from functools import reduce
from typing import List
MAX_RECURSIONS = 10
MAX_RECURSIONS = 5
# TODO: more flexible tokenization
def to_tokens(text):
@ -155,7 +155,7 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
matcher.pop(i)
tokens.pop(i)
possible_remixes = get_possible_remixes(matcher, similar_matcher)
possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher)
chosen_remix = possible_remixes[0]
# print("New tokens:", tokens)
@ -163,15 +163,21 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
return chosen_remix, (start_bounds, end_bounds)
def get_possible_remixes(matcher, similar_matcher):
def get_possible_remixes(knowledge_base, matcher, similar_matcher):
# print("*" * 20)
# print(matcher)
# print(similar_matcher)
matrix = []
for element in matcher:
assert(element in similar_matcher)
indexes = all_indexes(similar_matcher, element)
print("-", element)
print("+", similar_matcher)
assert(element in similar_matcher or isinstance(element, dict))
if isinstance(element, dict):
indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
else:
indexes = all_indexes(similar_matcher, element)
matrix.append(indexes)
# print(matrix)
@ -192,6 +198,24 @@ def all_indexes(collection, element):
return indexes
def all_matching_indexes(knowledge_base, collection, element):
indexes = []
assert("groups" in element)
element = element["groups"]
for i, instance in enumerate(collection):
if isinstance(instance, dict):
instance = instance["groups"]
elif instance in knowledge_base.knowledge:
instance = knowledge_base.knowledge[instance]["groups"]
intersection = set(instance) & set(element)
if len(intersection) > 0:
indexes.append((i, intersection))
return [x[0] for x in sorted(indexes, key=lambda x: len(x[1]), reverse=True)]
def find_bounds(matcher, similar_matcher):
start_bounds = []
for i, element in enumerate(matcher):
@ -314,27 +338,33 @@ def fitting_return_type(knowledge,
print()
while len(remaining_output) > 0:
((input, output),
(remaining_input, remaining_output)) = match_token(knowledge,
remaining_input,
remaining_output,
remaining_recursions - 1)
parsed_input += input
parsed_output += output
print(indent + "INP:", input)
print(indent + "OUT:", output)
for (elements,
(remaining_input,
remaining_output)) in match_token(knowledge,
remaining_input,
remaining_output,
remaining_recursions - 1):
parsed_input += elements
print(indent + "Elements:", elements)
break
print(indent + "Pi:", parsed_input)
print(indent + "Po:", parsed_output)
print("\x1b[7m", end='')
print(indent + "Ri:", remaining_input)
print(indent + "Ro:", remaining_output)
print("\x1b[0m")
return ((parsed_input, parsed_output),
(remaining_input, remaining_output + tail_of_ouput_stream))
print("\x1b[7m" + indent + "Ri:",
remaining_input,
"\x1b[0m]")
print("\x1b[7m" + indent + "Ro:",
remaining_output + tail_of_ouput_stream,
"\x1b[0m]")
print()
yield (parsed_input,
(remaining_input, remaining_output + tail_of_ouput_stream))
except TypeError as e:
print(indent + "X " + str(e))
pass
except ValueError as e:
print(indent + "X " + str(e))
pass
except IndexError as e:
print(indent + "X " + str(e))
pass
@ -353,7 +383,7 @@ def match_token(knowledge,
trained: List[str],
remaining_recursions: int):
if remaining_recursions < 1:
return None
yield None
# print("#" * (MAX_RECURSIONS - remaining_recursions))
# print("Input:", input)
@ -366,18 +396,60 @@ def match_token(knowledge,
if isinstance(expected_first, dict):
# TODO: check if the dictionary matches the values
return (([first_input], [expected_first]), (input[1:], trained[1:]))
yield (([first_input]), (input[1:], trained[1:]))
elif isinstance(expected_first, tuple):
return_type, remixer = expected_first
return fitting_return_type(knowledge,
return_type, remixer,
input, trained[1:],
remaining_recursions)
for r in fitting_return_type(knowledge,
return_type, remixer,
input, trained[1:],
remaining_recursions):
print("-->", r)
yield r
elif expected_first == first_input:
return (([first_input], [expected_first]), (input[1:], trained[1:]))
yield (([first_input]), (input[1:], trained[1:]))
yield None
def get_fit_onwards(knowledge, ast, remaining_input, remaining_output, remaining_recursions):
indent = "." + " " * (MAX_RECURSIONS - remaining_recursions)
try:
# TODO: merge with get_return type, as uses the same mechanism
if len(remaining_output) > 0:
for (elements,
(input_for_next_level,
output_for_next_level)) in match_token(knowledge,
remaining_input,
remaining_output,
remaining_recursions):
print("Nli:", input_for_next_level)
print("Nlo:", output_for_next_level)
print(indent + "E", elements)
try:
result = get_fit_onwards(knowledge, ast, input_for_next_level, output_for_next_level, remaining_recursions)
print(indent + "", result)
lower_elements, _ = result
print("<<<<< ELM:", elements, lower_elements)
return elements + lower_elements, ast
except TypeError as e:
print(indent + "X " + str(e))
except IndexError as e:
print(indent + "X " + str(e))
else:
print(indent + "Ri:", remaining_input)
print(indent + "Ro:", remaining_output)
print("OK")
elif len(remaining_input) == 0 and len(remaining_input) == 0:
print("<<<<< AST:", ast)
return [], ast
except TypeError as e:
print(indent + "X " + str(e))
except IndexError as e:
print(indent + "X " + str(e))
return None
@ -392,25 +464,14 @@ def get_fit(knowledge, row, remaining_recursions=MAX_RECURSIONS):
remaining_output = copy.deepcopy(sample)
print(indent + "AST:", ast)
print(indent + "S:", sample)
# TODO: merge with get_return type, as uses the same mechanism
while len(remaining_output) > 0:
((_, _), (remaining_input, remaining_output)) = match_token(knowledge,
remaining_input,
remaining_output,
remaining_recursions)
print(indent + "Ri:", remaining_input)
print(indent + "Ro:", remaining_output)
if len(remaining_input) == 0 and len(remaining_input) == 0:
print("!!!", tokens, sample, ast)
return tokens, sample, ast
result = get_fit_onwards(knowledge, ast, remaining_input,
remaining_output, remaining_recursions)
if result is not None:
return result
except TypeError as e:
print(indent + "X " + str(e))
pass
except IndexError as e:
print(indent + "X " + str(e))
pass
print()
else:
return None

View File

@ -16,30 +16,34 @@ examples = [
"text": "lava is dangerous",
"parsed": ("exists-property-with-value", 'lava', 'dangerous')
},
# {
# "text": "is lava dangerous?",
# "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')),
# },
{
"text": "is lava dangerous?",
"parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')),
},
{
"text": "earth is a planet",
"parsed": ("pertenence-to-group", 'earth', 'planet'),
},
# {
# "text": "is earth a moon?",
# "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')),
# },
# {
# "text": "Green is a color",
# "parsed": ("pertenence-to-group", 'green', 'color'),
# },
# {
# "text": "a plane can fly",
# "parsed": ("has-capacity", 'plane', 'fly')
# },
# {
# "text": "a wale can swim",
# "parsed": ("has-capacity", 'wale', 'swim')
# },
{
"text": "io is a moon",
"parsed": ("pertenence-to-group", 'io', 'moon'),
},
{
"text": "is earth a moon?",
"parsed": ("question", ("pertenence-to-group", 'earth', 'moon')),
},
{
"text": "Green is a color",
"parsed": ("pertenence-to-group", 'green', 'color'),
},
{
"text": "a plane can fly",
"parsed": ("has-capacity", 'plane', 'fly')
},
{
"text": "a wale can swim",
"parsed": ("has-capacity", 'wale', 'swim')
},
]
base_knowledge = {
@ -52,6 +56,9 @@ base_knowledge = {
'earth': {
"groups": set(['noun', 'object', 'planet']),
},
'io': {
"groups": set(['noun', 'object']),
},
'green': {
"groups": set(['noun', 'color', 'concept']),
},
@ -75,6 +82,9 @@ base_knowledge = {
'planet': {
"groups": set(['noun', 'group']),
},
'moon': {
"groups": set(['noun', 'group']),
},
'color': {
"groups": set(['property', 'group']),
},
@ -121,10 +131,17 @@ def main():
# print('-----')
# print(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
# print('-----')
# queryTrue = { "text": "is io a moon?", "parsed": ("question", ("pertenence-to-group", "io", "moon")) }
# queryFalse = { "text": "is io a planet?", "parsed": ("question", ("pertenence-to-group", "io", "planet")) }
queryTrue = {
"text": "is io a moon?",
"parsed": ("question", ("pertenence-to-group", "io", "moon"))
}
queryFalse = {
"text": "is io a planet?",
"parsed": ("question", ("pertenence-to-group", "io", "planet"))
}
# test_assumption(False, knowledge, queryFalse)
test_assumption(False, knowledge, queryFalse)
test_assumption(True, knowledge, queryTrue)
if __name__ == '__main__':
main()