Increase exploration, remove unnecessary initial knowledge.

This commit is contained in:
kenkeiras 2017-10-02 23:37:20 +02:00
parent a0810dd0e8
commit 75174e1736
2 changed files with 106 additions and 96 deletions

View File

@ -28,7 +28,7 @@ def make_template(knowledge_base, tokens, parsed):
if word in template: if word in template:
template[template.index(word)] = i template[template.index(word)] = i
matcher[i] = { matcher[i] = {
'groups': set(knowledge_base.knowledge[word]['groups']) 'groups': set(knowledge_base.knowledge.get(word, {}).get('groups', set())),
} }
return tokens, matcher, template return tokens, matcher, template
@ -98,8 +98,15 @@ def integrate_language(knowledge_base, example):
for position, atom in lower_levels: for position, atom in lower_levels:
with session().log("Atom {}".format(atom)): with session().log("Atom {}".format(atom)):
similar = get_similar_tree(knowledge_base, atom, tokens) similars = get_similar_tree(knowledge_base, atom, tokens)
remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar) for similar in similars:
result = build_remix_matrix(knowledge_base, tokens, atom, similar)
if result is not None:
break
if result is None:
raise Exception("No match found")
remix, (start_bounds, end_bounds) = result
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix) after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
session().annotate("--FIND MIX--") session().annotate("--FIND MIX--")
@ -161,6 +168,7 @@ def apply_remix(tokens, remix):
def build_remix_matrix(knowledge_base, tokens, atom, similar): def build_remix_matrix(knowledge_base, tokens, atom, similar):
tokens = list(tokens) tokens = list(tokens)
with session().log("Remix matrix for {} - {}".format(tokens, atom)):
tokens, matcher, result = make_template(knowledge_base, tokens, atom) tokens, matcher, result = make_template(knowledge_base, tokens, atom)
similar_matcher, similar_result, similar_result_resolved, _, _ = similar similar_matcher, similar_result, similar_result_resolved, _, _ = similar
@ -171,6 +179,10 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
tokens.pop(i) tokens.pop(i)
possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher) possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher)
session().annotate("Possible remixes: {}".format(possible_remixes))
if len(possible_remixes) < 1:
return None
chosen_remix = possible_remixes[0] chosen_remix = possible_remixes[0]
return chosen_remix, (start_bounds, end_bounds) return chosen_remix, (start_bounds, end_bounds)
@ -179,16 +191,20 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
def get_possible_remixes(knowledge_base, matcher, similar_matcher): def get_possible_remixes(knowledge_base, matcher, similar_matcher):
matrix = [] matrix = []
with session().log("Possible remixes from matcher: {}".format(matcher)):
for element in matcher: for element in matcher:
session().annotate("- {}".format(element)) with session().log("Element `{}`".format(element)):
session().annotate("+ {}".format(similar_matcher)) session().annotate("Similar `{}`".format(similar_matcher))
if element in similar_matcher or isinstance(element, dict): if element in similar_matcher or isinstance(element, dict):
if isinstance(element, dict): if isinstance(element, dict):
indexes = all_matching_indexes(knowledge_base, similar_matcher, element) indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
session().annotate("Dict element matching: {}".format(indexes))
else: else:
indexes = all_indexes(similar_matcher, element) indexes = all_indexes(similar_matcher, element)
session().annotate("* element matching: {}".format(indexes))
matrix.append(indexes) matrix.append(indexes)
else: else:
session().annotate("`else` element matching: [element]")
matrix.append([element]) matrix.append([element])
# TODO: do some scoring to find the most "interesting combination" # TODO: do some scoring to find the most "interesting combination"
@ -298,12 +314,14 @@ def get_similar_tree(knowledge_base, atom, tokens):
return None return None
for i, possibility in enumerate(sorted_possibilities): for i, possibility in enumerate(sorted_possibilities):
similar_matcher, similar_result, similar_result_resolved, _, _ = possibility similar_matcher, similar_result, similar_result_resolved, _atom_score, _token_score = possibility
with session().log("Like {}".format(similar_matcher)): with session().log("Like {}".format(similar_matcher)):
session().annotate('Results on: {}'.format(similar_result_resolved))
session().annotate('AST: {}'.format(similar_result)) session().annotate('AST: {}'.format(similar_result))
session().annotate('Results on: {}'.format(similar_result_resolved))
session().annotate('Atom score: {}'.format(_atom_score))
session().annotate('Token score: {}'.format(_token_score))
return sorted_possibilities[0] return sorted_possibilities
# TODO: unroll this mess # TODO: unroll this mess
@ -375,14 +393,14 @@ def reverse_remix(tree_section, remix):
def get_fit(knowledge, tokens, remaining_recursions=parameters.MAX_RECURSIONS): def get_fit(knowledge, tokens, remaining_recursions=parameters.MAX_RECURSIONS):
results = [] results = []
for matcher, ast in knowledge.trained: for matcher, ast in knowledge.trained:
with session().log("{} <- {}".format(matcher, tokens)):
result = match_fit(knowledge, tokens, matcher, ast, result = match_fit(knowledge, tokens, matcher, ast,
remaining_recursions) remaining_recursions)
if result is not None: if result is not None:
with session().log("Result: {}".format(result)):
results.append(result) results.append(result)
session().annotate("XXX {}".format(result))
session().annotate(' - ' + '\n - '.join(map(str, results)))
if len(results) > 0: if len(results) > 0:
return results[0] return results[0]
@ -407,6 +425,7 @@ def resolve_fit(knowledge, fit, remaining_recursions):
if is_definite_minisegment(element): if is_definite_minisegment(element):
fitted.append(element) fitted.append(element)
else: else:
with session().log("Resolving fit of `{}`".format(element)):
((result_type, remixer), tokens) = element ((result_type, remixer), tokens) = element
remixed_tokens = reverse_remix(tokens, remixer) remixed_tokens = reverse_remix(tokens, remixer)
if remixed_tokens is None: if remixed_tokens is None:
@ -430,8 +449,8 @@ def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
session().annotate(indent + 'T> {}'.format(tokens)) session().annotate(indent + 'T> {}'.format(tokens))
session().annotate(indent + 'M> {}'.format(matcher)) session().annotate(indent + 'M> {}'.format(matcher))
for minisegment in matcher: for minisegment in matcher:
with session().log("Minisegment `{}`".format(minisegment)):
possibilities_after_round = [] possibilities_after_round = []
session().annotate(indent + "MS {}".format(minisegment))
for matched_tokens, remaining_tokens in segment_possibilities: for matched_tokens, remaining_tokens in segment_possibilities:
if len(remaining_tokens) < 1: if len(remaining_tokens) < 1:
continue continue
@ -456,7 +475,12 @@ def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
session().annotate(indent + "## PA {}".format(possibilities_after_round)) session().annotate(indent + "## PA {}".format(possibilities_after_round))
else: else:
segment_possibilities = possibilities_after_round segment_possibilities = possibilities_after_round
session().annotate(">>>> {}".format(len(segment_possibilities))) for possibility in segment_possibilities:
with session().log("Possibility: `{}`".format(possibility)):
pass
if len(segment_possibilities) < 1:
with session().log("NO POSSIBLE"):
pass
fully_matched_segments = [(matched, remaining) fully_matched_segments = [(matched, remaining)
for (matched, remaining) for (matched, remaining)
@ -464,15 +488,19 @@ def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
if len(remaining) == 0] if len(remaining) == 0]
resolved_fits = [] resolved_fits = []
with session().log("Full matches"):
for fit, _ in fully_matched_segments: for fit, _ in fully_matched_segments:
session().annotate(indent + "::: {}".format(fit)) # REMIXES HAVE TO BE APPLIED BEFORE!!! with session().log(fit): # REMIXES HAVE TO BE APPLIED BEFORE!!!
pass
session().annotate(indent + '*' * 20) with session().log("Resolutions"):
for fit, _ in fully_matched_segments: for fit, _ in fully_matched_segments:
session().annotate(indent + "::: {}".format(fit)) # REMIXES HAVE TO BE APPLIED BEFORE!!! with session().log("Resolving {}".format(fit)): # REMIXES HAVE TO BE APPLIED BEFORE!!!
resolved_fit = resolve_fit(knowledge, fit, remaining_recursions) resolved_fit = resolve_fit(knowledge, fit, remaining_recursions)
if resolved_fit is not None: if resolved_fit is not None:
resolved_fits.append(resolved_fit) resolved_fits.append(resolved_fit)
else:
session().annotate("Not resolved")
if len(resolved_fits) == 0: if len(resolved_fits) == 0:
return None return None

View File

@ -655,24 +655,9 @@ examples = [
] ]
base_knowledge = { base_knowledge = {
'icecream': {
"groups": {'noun', 'object', 'comestible', 'sweet'},
},
'hot': {
"groups": {'property', 'temperature'},
},
'summer': { 'summer': {
"groups": {'epoch'}, "groups": {'epoch'},
}, },
'planet': {
"groups": {'noun', 'group'},
},
'green': {
"groups": {'noun', 'color', 'concept'},
},
'milk': {
"groups": {'noun'},
},
'fly': { 'fly': {
"groups": {'verb'}, "groups": {'verb'},
}, },
@ -682,9 +667,6 @@ base_knowledge = {
'electricity': { 'electricity': {
"groups": {'power'}, "groups": {'power'},
}, },
'french': {
"groups": {'language'},
}
} }
def main(): def main():