Increase exploration, remove unnecessary initial knowledge.
This commit is contained in:
parent
a0810dd0e8
commit
75174e1736
@ -28,7 +28,7 @@ def make_template(knowledge_base, tokens, parsed):
|
||||
if word in template:
|
||||
template[template.index(word)] = i
|
||||
matcher[i] = {
|
||||
'groups': set(knowledge_base.knowledge[word]['groups'])
|
||||
'groups': set(knowledge_base.knowledge.get(word, {}).get('groups', set())),
|
||||
}
|
||||
return tokens, matcher, template
|
||||
|
||||
@ -98,8 +98,15 @@ def integrate_language(knowledge_base, example):
|
||||
|
||||
for position, atom in lower_levels:
|
||||
with session().log("Atom {}".format(atom)):
|
||||
similar = get_similar_tree(knowledge_base, atom, tokens)
|
||||
remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar)
|
||||
similars = get_similar_tree(knowledge_base, atom, tokens)
|
||||
for similar in similars:
|
||||
result = build_remix_matrix(knowledge_base, tokens, atom, similar)
|
||||
if result is not None:
|
||||
break
|
||||
|
||||
if result is None:
|
||||
raise Exception("No match found")
|
||||
remix, (start_bounds, end_bounds) = result
|
||||
|
||||
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
|
||||
session().annotate("--FIND MIX--")
|
||||
@ -161,38 +168,47 @@ def apply_remix(tokens, remix):
|
||||
|
||||
def build_remix_matrix(knowledge_base, tokens, atom, similar):
|
||||
tokens = list(tokens)
|
||||
tokens, matcher, result = make_template(knowledge_base, tokens, atom)
|
||||
similar_matcher, similar_result, similar_result_resolved, _, _ = similar
|
||||
with session().log("Remix matrix for {} - {}".format(tokens, atom)):
|
||||
tokens, matcher, result = make_template(knowledge_base, tokens, atom)
|
||||
similar_matcher, similar_result, similar_result_resolved, _, _ = similar
|
||||
|
||||
start_bounds, end_bounds = find_bounds(knowledge_base, matcher, similar_matcher)
|
||||
start_bounds, end_bounds = find_bounds(knowledge_base, matcher, similar_matcher)
|
||||
|
||||
for i, element in (end_bounds + start_bounds[::-1]):
|
||||
matcher.pop(i)
|
||||
tokens.pop(i)
|
||||
for i, element in (end_bounds + start_bounds[::-1]):
|
||||
matcher.pop(i)
|
||||
tokens.pop(i)
|
||||
|
||||
possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher)
|
||||
chosen_remix = possible_remixes[0]
|
||||
possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher)
|
||||
session().annotate("Possible remixes: {}".format(possible_remixes))
|
||||
if len(possible_remixes) < 1:
|
||||
return None
|
||||
|
||||
return chosen_remix, (start_bounds, end_bounds)
|
||||
chosen_remix = possible_remixes[0]
|
||||
|
||||
return chosen_remix, (start_bounds, end_bounds)
|
||||
|
||||
|
||||
def get_possible_remixes(knowledge_base, matcher, similar_matcher):
|
||||
|
||||
matrix = []
|
||||
for element in matcher:
|
||||
session().annotate("- {}".format(element))
|
||||
session().annotate("+ {}".format(similar_matcher))
|
||||
if element in similar_matcher or isinstance(element, dict):
|
||||
if isinstance(element, dict):
|
||||
indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
|
||||
else:
|
||||
indexes = all_indexes(similar_matcher, element)
|
||||
matrix.append(indexes)
|
||||
else:
|
||||
matrix.append([element])
|
||||
with session().log("Possible remixes from matcher: {}".format(matcher)):
|
||||
for element in matcher:
|
||||
with session().log("Element `{}`".format(element)):
|
||||
session().annotate("Similar `{}`".format(similar_matcher))
|
||||
if element in similar_matcher or isinstance(element, dict):
|
||||
if isinstance(element, dict):
|
||||
indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
|
||||
session().annotate("Dict element matching: {}".format(indexes))
|
||||
else:
|
||||
indexes = all_indexes(similar_matcher, element)
|
||||
session().annotate("* element matching: {}".format(indexes))
|
||||
matrix.append(indexes)
|
||||
else:
|
||||
session().annotate("`else` element matching: [element]")
|
||||
matrix.append([element])
|
||||
|
||||
# TODO: do some scoring to find the most "interesting combination"
|
||||
return [list(x) for x in list(zip(*matrix))]
|
||||
# TODO: do some scoring to find the most "interesting combination"
|
||||
return [list(x) for x in list(zip(*matrix))]
|
||||
|
||||
|
||||
def all_indexes(collection, element):
|
||||
@ -298,12 +314,14 @@ def get_similar_tree(knowledge_base, atom, tokens):
|
||||
return None
|
||||
|
||||
for i, possibility in enumerate(sorted_possibilities):
|
||||
similar_matcher, similar_result, similar_result_resolved, _, _ = possibility
|
||||
similar_matcher, similar_result, similar_result_resolved, _atom_score, _token_score = possibility
|
||||
with session().log("Like {}".format(similar_matcher)):
|
||||
session().annotate('Results on: {}'.format(similar_result_resolved))
|
||||
session().annotate('AST: {}'.format(similar_result))
|
||||
session().annotate('Results on: {}'.format(similar_result_resolved))
|
||||
session().annotate('Atom score: {}'.format(_atom_score))
|
||||
session().annotate('Token score: {}'.format(_token_score))
|
||||
|
||||
return sorted_possibilities[0]
|
||||
return sorted_possibilities
|
||||
|
||||
|
||||
# TODO: unroll this mess
|
||||
@ -375,14 +393,14 @@ def reverse_remix(tree_section, remix):
|
||||
def get_fit(knowledge, tokens, remaining_recursions=parameters.MAX_RECURSIONS):
|
||||
results = []
|
||||
for matcher, ast in knowledge.trained:
|
||||
result = match_fit(knowledge, tokens, matcher, ast,
|
||||
remaining_recursions)
|
||||
with session().log("{} <- {}".format(matcher, tokens)):
|
||||
result = match_fit(knowledge, tokens, matcher, ast,
|
||||
remaining_recursions)
|
||||
|
||||
if result is not None:
|
||||
results.append(result)
|
||||
session().annotate("XXX {}".format(result))
|
||||
if result is not None:
|
||||
with session().log("Result: {}".format(result)):
|
||||
results.append(result)
|
||||
|
||||
session().annotate(' - ' + '\n - '.join(map(str, results)))
|
||||
if len(results) > 0:
|
||||
return results[0]
|
||||
|
||||
@ -407,19 +425,20 @@ def resolve_fit(knowledge, fit, remaining_recursions):
|
||||
if is_definite_minisegment(element):
|
||||
fitted.append(element)
|
||||
else:
|
||||
((result_type, remixer), tokens) = element
|
||||
remixed_tokens = reverse_remix(tokens, remixer)
|
||||
if remixed_tokens is None:
|
||||
return None
|
||||
with session().log("Resolving fit of `{}`".format(element)):
|
||||
((result_type, remixer), tokens) = element
|
||||
remixed_tokens = reverse_remix(tokens, remixer)
|
||||
if remixed_tokens is None:
|
||||
return None
|
||||
|
||||
minifit = get_fit(knowledge, remixed_tokens, remaining_recursions - 1)
|
||||
if minifit is None:
|
||||
return None
|
||||
minifit = get_fit(knowledge, remixed_tokens, remaining_recursions - 1)
|
||||
if minifit is None:
|
||||
return None
|
||||
|
||||
minitokens, miniast = minifit
|
||||
session().annotate(" AST | {}".format(miniast))
|
||||
subproperty = knowledge_evaluation.resolve(knowledge.knowledge, minitokens, miniast)
|
||||
fitted.append(subproperty)
|
||||
minitokens, miniast = minifit
|
||||
session().annotate(" AST | {}".format(miniast))
|
||||
subproperty = knowledge_evaluation.resolve(knowledge.knowledge, minitokens, miniast)
|
||||
fitted.append(subproperty)
|
||||
|
||||
return fitted
|
||||
|
||||
@ -430,33 +449,38 @@ def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
|
||||
session().annotate(indent + 'T> {}'.format(tokens))
|
||||
session().annotate(indent + 'M> {}'.format(matcher))
|
||||
for minisegment in matcher:
|
||||
possibilities_after_round = []
|
||||
session().annotate(indent + "MS {}".format(minisegment))
|
||||
for matched_tokens, remaining_tokens in segment_possibilities:
|
||||
if len(remaining_tokens) < 1:
|
||||
continue
|
||||
with session().log("Minisegment `{}`".format(minisegment)):
|
||||
possibilities_after_round = []
|
||||
for matched_tokens, remaining_tokens in segment_possibilities:
|
||||
if len(remaining_tokens) < 1:
|
||||
continue
|
||||
|
||||
session().annotate(indent + "RT {}".format(remaining_tokens[0]))
|
||||
session().annotate(indent + "DEF {}".format(is_definite_minisegment(minisegment)))
|
||||
if is_definite_minisegment(minisegment):
|
||||
# What if not match -----<
|
||||
if match_token(knowledge, remaining_tokens[0], minisegment):
|
||||
possibilities_after_round.append((
|
||||
matched_tokens + [remaining_tokens[0]],
|
||||
remaining_tokens[1:]
|
||||
))
|
||||
session().annotate(indent + "RT {}".format(remaining_tokens[0]))
|
||||
session().annotate(indent + "DEF {}".format(is_definite_minisegment(minisegment)))
|
||||
if is_definite_minisegment(minisegment):
|
||||
# What if not match -----<
|
||||
if match_token(knowledge, remaining_tokens[0], minisegment):
|
||||
possibilities_after_round.append((
|
||||
matched_tokens + [remaining_tokens[0]],
|
||||
remaining_tokens[1:]
|
||||
))
|
||||
else:
|
||||
# What if not match!!!!!!-----<
|
||||
# TODO: optimize this with a look ahead
|
||||
for i in range(1, len(tokens)):
|
||||
possibilities_after_round.append((
|
||||
matched_tokens + [(minisegment, remaining_tokens[:i])],
|
||||
remaining_tokens[i:]
|
||||
))
|
||||
session().annotate(indent + "## PA {}".format(possibilities_after_round))
|
||||
else:
|
||||
# What if not match!!!!!!-----<
|
||||
# TODO: optimize this with a look ahead
|
||||
for i in range(1, len(tokens)):
|
||||
possibilities_after_round.append((
|
||||
matched_tokens + [(minisegment, remaining_tokens[:i])],
|
||||
remaining_tokens[i:]
|
||||
))
|
||||
session().annotate(indent + "## PA {}".format(possibilities_after_round))
|
||||
else:
|
||||
segment_possibilities = possibilities_after_round
|
||||
session().annotate(">>>> {}".format(len(segment_possibilities)))
|
||||
segment_possibilities = possibilities_after_round
|
||||
for possibility in segment_possibilities:
|
||||
with session().log("Possibility: `{}`".format(possibility)):
|
||||
pass
|
||||
if len(segment_possibilities) < 1:
|
||||
with session().log("NO POSSIBLE"):
|
||||
pass
|
||||
|
||||
fully_matched_segments = [(matched, remaining)
|
||||
for (matched, remaining)
|
||||
@ -464,15 +488,19 @@ def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
|
||||
if len(remaining) == 0]
|
||||
|
||||
resolved_fits = []
|
||||
for fit, _ in fully_matched_segments:
|
||||
session().annotate(indent + "::: {}".format(fit)) # REMIXES HAVE TO BE APPLIED BEFORE!!!
|
||||
with session().log("Full matches"):
|
||||
for fit, _ in fully_matched_segments:
|
||||
with session().log(fit): # REMIXES HAVE TO BE APPLIED BEFORE!!!
|
||||
pass
|
||||
|
||||
session().annotate(indent + '*' * 20)
|
||||
for fit, _ in fully_matched_segments:
|
||||
session().annotate(indent + "::: {}".format(fit)) # REMIXES HAVE TO BE APPLIED BEFORE!!!
|
||||
resolved_fit = resolve_fit(knowledge, fit, remaining_recursions)
|
||||
if resolved_fit is not None:
|
||||
resolved_fits.append(resolved_fit)
|
||||
with session().log("Resolutions"):
|
||||
for fit, _ in fully_matched_segments:
|
||||
with session().log("Resolving {}".format(fit)): # REMIXES HAVE TO BE APPLIED BEFORE!!!
|
||||
resolved_fit = resolve_fit(knowledge, fit, remaining_recursions)
|
||||
if resolved_fit is not None:
|
||||
resolved_fits.append(resolved_fit)
|
||||
else:
|
||||
session().annotate("Not resolved")
|
||||
|
||||
if len(resolved_fits) == 0:
|
||||
return None
|
||||
|
@ -655,24 +655,9 @@ examples = [
|
||||
]
|
||||
|
||||
base_knowledge = {
|
||||
'icecream': {
|
||||
"groups": {'noun', 'object', 'comestible', 'sweet'},
|
||||
},
|
||||
'hot': {
|
||||
"groups": {'property', 'temperature'},
|
||||
},
|
||||
'summer': {
|
||||
"groups": {'epoch'},
|
||||
},
|
||||
'planet': {
|
||||
"groups": {'noun', 'group'},
|
||||
},
|
||||
'green': {
|
||||
"groups": {'noun', 'color', 'concept'},
|
||||
},
|
||||
'milk': {
|
||||
"groups": {'noun'},
|
||||
},
|
||||
'fly': {
|
||||
"groups": {'verb'},
|
||||
},
|
||||
@ -682,9 +667,6 @@ base_knowledge = {
|
||||
'electricity': {
|
||||
"groups": {'power'},
|
||||
},
|
||||
'french': {
|
||||
"groups": {'language'},
|
||||
}
|
||||
}
|
||||
|
||||
def main():
|
||||
|
Loading…
Reference in New Issue
Block a user