Increase exploration, remove unnecessary initial knowledge.
This commit is contained in:
parent
a0810dd0e8
commit
75174e1736
@ -28,7 +28,7 @@ def make_template(knowledge_base, tokens, parsed):
|
|||||||
if word in template:
|
if word in template:
|
||||||
template[template.index(word)] = i
|
template[template.index(word)] = i
|
||||||
matcher[i] = {
|
matcher[i] = {
|
||||||
'groups': set(knowledge_base.knowledge[word]['groups'])
|
'groups': set(knowledge_base.knowledge.get(word, {}).get('groups', set())),
|
||||||
}
|
}
|
||||||
return tokens, matcher, template
|
return tokens, matcher, template
|
||||||
|
|
||||||
@ -98,8 +98,15 @@ def integrate_language(knowledge_base, example):
|
|||||||
|
|
||||||
for position, atom in lower_levels:
|
for position, atom in lower_levels:
|
||||||
with session().log("Atom {}".format(atom)):
|
with session().log("Atom {}".format(atom)):
|
||||||
similar = get_similar_tree(knowledge_base, atom, tokens)
|
similars = get_similar_tree(knowledge_base, atom, tokens)
|
||||||
remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar)
|
for similar in similars:
|
||||||
|
result = build_remix_matrix(knowledge_base, tokens, atom, similar)
|
||||||
|
if result is not None:
|
||||||
|
break
|
||||||
|
|
||||||
|
if result is None:
|
||||||
|
raise Exception("No match found")
|
||||||
|
remix, (start_bounds, end_bounds) = result
|
||||||
|
|
||||||
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
|
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
|
||||||
session().annotate("--FIND MIX--")
|
session().annotate("--FIND MIX--")
|
||||||
@ -161,6 +168,7 @@ def apply_remix(tokens, remix):
|
|||||||
|
|
||||||
def build_remix_matrix(knowledge_base, tokens, atom, similar):
|
def build_remix_matrix(knowledge_base, tokens, atom, similar):
|
||||||
tokens = list(tokens)
|
tokens = list(tokens)
|
||||||
|
with session().log("Remix matrix for {} - {}".format(tokens, atom)):
|
||||||
tokens, matcher, result = make_template(knowledge_base, tokens, atom)
|
tokens, matcher, result = make_template(knowledge_base, tokens, atom)
|
||||||
similar_matcher, similar_result, similar_result_resolved, _, _ = similar
|
similar_matcher, similar_result, similar_result_resolved, _, _ = similar
|
||||||
|
|
||||||
@ -171,6 +179,10 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
|
|||||||
tokens.pop(i)
|
tokens.pop(i)
|
||||||
|
|
||||||
possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher)
|
possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher)
|
||||||
|
session().annotate("Possible remixes: {}".format(possible_remixes))
|
||||||
|
if len(possible_remixes) < 1:
|
||||||
|
return None
|
||||||
|
|
||||||
chosen_remix = possible_remixes[0]
|
chosen_remix = possible_remixes[0]
|
||||||
|
|
||||||
return chosen_remix, (start_bounds, end_bounds)
|
return chosen_remix, (start_bounds, end_bounds)
|
||||||
@ -179,16 +191,20 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar):
|
|||||||
def get_possible_remixes(knowledge_base, matcher, similar_matcher):
|
def get_possible_remixes(knowledge_base, matcher, similar_matcher):
|
||||||
|
|
||||||
matrix = []
|
matrix = []
|
||||||
|
with session().log("Possible remixes from matcher: {}".format(matcher)):
|
||||||
for element in matcher:
|
for element in matcher:
|
||||||
session().annotate("- {}".format(element))
|
with session().log("Element `{}`".format(element)):
|
||||||
session().annotate("+ {}".format(similar_matcher))
|
session().annotate("Similar `{}`".format(similar_matcher))
|
||||||
if element in similar_matcher or isinstance(element, dict):
|
if element in similar_matcher or isinstance(element, dict):
|
||||||
if isinstance(element, dict):
|
if isinstance(element, dict):
|
||||||
indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
|
indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
|
||||||
|
session().annotate("Dict element matching: {}".format(indexes))
|
||||||
else:
|
else:
|
||||||
indexes = all_indexes(similar_matcher, element)
|
indexes = all_indexes(similar_matcher, element)
|
||||||
|
session().annotate("* element matching: {}".format(indexes))
|
||||||
matrix.append(indexes)
|
matrix.append(indexes)
|
||||||
else:
|
else:
|
||||||
|
session().annotate("`else` element matching: [element]")
|
||||||
matrix.append([element])
|
matrix.append([element])
|
||||||
|
|
||||||
# TODO: do some scoring to find the most "interesting combination"
|
# TODO: do some scoring to find the most "interesting combination"
|
||||||
@ -298,12 +314,14 @@ def get_similar_tree(knowledge_base, atom, tokens):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
for i, possibility in enumerate(sorted_possibilities):
|
for i, possibility in enumerate(sorted_possibilities):
|
||||||
similar_matcher, similar_result, similar_result_resolved, _, _ = possibility
|
similar_matcher, similar_result, similar_result_resolved, _atom_score, _token_score = possibility
|
||||||
with session().log("Like {}".format(similar_matcher)):
|
with session().log("Like {}".format(similar_matcher)):
|
||||||
session().annotate('Results on: {}'.format(similar_result_resolved))
|
|
||||||
session().annotate('AST: {}'.format(similar_result))
|
session().annotate('AST: {}'.format(similar_result))
|
||||||
|
session().annotate('Results on: {}'.format(similar_result_resolved))
|
||||||
|
session().annotate('Atom score: {}'.format(_atom_score))
|
||||||
|
session().annotate('Token score: {}'.format(_token_score))
|
||||||
|
|
||||||
return sorted_possibilities[0]
|
return sorted_possibilities
|
||||||
|
|
||||||
|
|
||||||
# TODO: unroll this mess
|
# TODO: unroll this mess
|
||||||
@ -375,14 +393,14 @@ def reverse_remix(tree_section, remix):
|
|||||||
def get_fit(knowledge, tokens, remaining_recursions=parameters.MAX_RECURSIONS):
|
def get_fit(knowledge, tokens, remaining_recursions=parameters.MAX_RECURSIONS):
|
||||||
results = []
|
results = []
|
||||||
for matcher, ast in knowledge.trained:
|
for matcher, ast in knowledge.trained:
|
||||||
|
with session().log("{} <- {}".format(matcher, tokens)):
|
||||||
result = match_fit(knowledge, tokens, matcher, ast,
|
result = match_fit(knowledge, tokens, matcher, ast,
|
||||||
remaining_recursions)
|
remaining_recursions)
|
||||||
|
|
||||||
if result is not None:
|
if result is not None:
|
||||||
|
with session().log("Result: {}".format(result)):
|
||||||
results.append(result)
|
results.append(result)
|
||||||
session().annotate("XXX {}".format(result))
|
|
||||||
|
|
||||||
session().annotate(' - ' + '\n - '.join(map(str, results)))
|
|
||||||
if len(results) > 0:
|
if len(results) > 0:
|
||||||
return results[0]
|
return results[0]
|
||||||
|
|
||||||
@ -407,6 +425,7 @@ def resolve_fit(knowledge, fit, remaining_recursions):
|
|||||||
if is_definite_minisegment(element):
|
if is_definite_minisegment(element):
|
||||||
fitted.append(element)
|
fitted.append(element)
|
||||||
else:
|
else:
|
||||||
|
with session().log("Resolving fit of `{}`".format(element)):
|
||||||
((result_type, remixer), tokens) = element
|
((result_type, remixer), tokens) = element
|
||||||
remixed_tokens = reverse_remix(tokens, remixer)
|
remixed_tokens = reverse_remix(tokens, remixer)
|
||||||
if remixed_tokens is None:
|
if remixed_tokens is None:
|
||||||
@ -430,8 +449,8 @@ def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
|
|||||||
session().annotate(indent + 'T> {}'.format(tokens))
|
session().annotate(indent + 'T> {}'.format(tokens))
|
||||||
session().annotate(indent + 'M> {}'.format(matcher))
|
session().annotate(indent + 'M> {}'.format(matcher))
|
||||||
for minisegment in matcher:
|
for minisegment in matcher:
|
||||||
|
with session().log("Minisegment `{}`".format(minisegment)):
|
||||||
possibilities_after_round = []
|
possibilities_after_round = []
|
||||||
session().annotate(indent + "MS {}".format(minisegment))
|
|
||||||
for matched_tokens, remaining_tokens in segment_possibilities:
|
for matched_tokens, remaining_tokens in segment_possibilities:
|
||||||
if len(remaining_tokens) < 1:
|
if len(remaining_tokens) < 1:
|
||||||
continue
|
continue
|
||||||
@ -456,7 +475,12 @@ def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
|
|||||||
session().annotate(indent + "## PA {}".format(possibilities_after_round))
|
session().annotate(indent + "## PA {}".format(possibilities_after_round))
|
||||||
else:
|
else:
|
||||||
segment_possibilities = possibilities_after_round
|
segment_possibilities = possibilities_after_round
|
||||||
session().annotate(">>>> {}".format(len(segment_possibilities)))
|
for possibility in segment_possibilities:
|
||||||
|
with session().log("Possibility: `{}`".format(possibility)):
|
||||||
|
pass
|
||||||
|
if len(segment_possibilities) < 1:
|
||||||
|
with session().log("NO POSSIBLE"):
|
||||||
|
pass
|
||||||
|
|
||||||
fully_matched_segments = [(matched, remaining)
|
fully_matched_segments = [(matched, remaining)
|
||||||
for (matched, remaining)
|
for (matched, remaining)
|
||||||
@ -464,15 +488,19 @@ def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
|
|||||||
if len(remaining) == 0]
|
if len(remaining) == 0]
|
||||||
|
|
||||||
resolved_fits = []
|
resolved_fits = []
|
||||||
|
with session().log("Full matches"):
|
||||||
for fit, _ in fully_matched_segments:
|
for fit, _ in fully_matched_segments:
|
||||||
session().annotate(indent + "::: {}".format(fit)) # REMIXES HAVE TO BE APPLIED BEFORE!!!
|
with session().log(fit): # REMIXES HAVE TO BE APPLIED BEFORE!!!
|
||||||
|
pass
|
||||||
|
|
||||||
session().annotate(indent + '*' * 20)
|
with session().log("Resolutions"):
|
||||||
for fit, _ in fully_matched_segments:
|
for fit, _ in fully_matched_segments:
|
||||||
session().annotate(indent + "::: {}".format(fit)) # REMIXES HAVE TO BE APPLIED BEFORE!!!
|
with session().log("Resolving {}".format(fit)): # REMIXES HAVE TO BE APPLIED BEFORE!!!
|
||||||
resolved_fit = resolve_fit(knowledge, fit, remaining_recursions)
|
resolved_fit = resolve_fit(knowledge, fit, remaining_recursions)
|
||||||
if resolved_fit is not None:
|
if resolved_fit is not None:
|
||||||
resolved_fits.append(resolved_fit)
|
resolved_fits.append(resolved_fit)
|
||||||
|
else:
|
||||||
|
session().annotate("Not resolved")
|
||||||
|
|
||||||
if len(resolved_fits) == 0:
|
if len(resolved_fits) == 0:
|
||||||
return None
|
return None
|
||||||
|
@ -655,24 +655,9 @@ examples = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
base_knowledge = {
|
base_knowledge = {
|
||||||
'icecream': {
|
|
||||||
"groups": {'noun', 'object', 'comestible', 'sweet'},
|
|
||||||
},
|
|
||||||
'hot': {
|
|
||||||
"groups": {'property', 'temperature'},
|
|
||||||
},
|
|
||||||
'summer': {
|
'summer': {
|
||||||
"groups": {'epoch'},
|
"groups": {'epoch'},
|
||||||
},
|
},
|
||||||
'planet': {
|
|
||||||
"groups": {'noun', 'group'},
|
|
||||||
},
|
|
||||||
'green': {
|
|
||||||
"groups": {'noun', 'color', 'concept'},
|
|
||||||
},
|
|
||||||
'milk': {
|
|
||||||
"groups": {'noun'},
|
|
||||||
},
|
|
||||||
'fly': {
|
'fly': {
|
||||||
"groups": {'verb'},
|
"groups": {'verb'},
|
||||||
},
|
},
|
||||||
@ -682,9 +667,6 @@ base_knowledge = {
|
|||||||
'electricity': {
|
'electricity': {
|
||||||
"groups": {'power'},
|
"groups": {'power'},
|
||||||
},
|
},
|
||||||
'french': {
|
|
||||||
"groups": {'language'},
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
Loading…
Reference in New Issue
Block a user