Add session files base implementation.
This commit is contained in:
commit
a0810dd0e8
1
.gitignore
vendored
1
.gitignore
vendored
@ -3,3 +3,4 @@
|
|||||||
*.ba?k
|
*.ba?k
|
||||||
*.pyc
|
*.pyc
|
||||||
__pycache__
|
__pycache__
|
||||||
|
treeNLU-*session*.org
|
||||||
|
@ -1,4 +1,9 @@
|
|||||||
import logging
|
import logging
|
||||||
|
import datetime
|
||||||
|
from .session.org_mode import (
|
||||||
|
global_session as session,
|
||||||
|
create_global_session,
|
||||||
|
)
|
||||||
from .knowledge_base import KnowledgeBase
|
from .knowledge_base import KnowledgeBase
|
||||||
from .visualization import (
|
from .visualization import (
|
||||||
show_knowledge,
|
show_knowledge,
|
||||||
@ -15,8 +20,14 @@ from .modifiable_property import (
|
|||||||
bye_phrases = ['bye', 'exit']
|
bye_phrases = ['bye', 'exit']
|
||||||
|
|
||||||
|
|
||||||
|
def gen_session_name():
|
||||||
|
now = datetime.datetime.utcnow()
|
||||||
|
return "treeNLU-cli-session-{}.org".format(
|
||||||
|
now.strftime("%y_%m_%d %H:%M:%S_%f"))
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
create_global_session(gen_session_name())
|
||||||
logging.getLogger().setLevel(logging.INFO)
|
logging.getLogger().setLevel(logging.INFO)
|
||||||
knowledge = gac_100.main()
|
knowledge = gac_100.main()
|
||||||
logging.getLogger().setLevel(logging.DEBUG)
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
@ -38,6 +49,7 @@ def main():
|
|||||||
show_samples(knowledge)
|
show_samples(knowledge)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
with session().log(data):
|
||||||
ret = knowledge.process(data)
|
ret = knowledge.process(data)
|
||||||
if ret:
|
if ret:
|
||||||
result, _, _ = ret
|
result, _, _ = ret
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
import copy
|
import copy
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from .session.org_mode import global_session as session
|
||||||
|
|
||||||
from . import parsing
|
from . import parsing
|
||||||
from . import knowledge_evaluation
|
from . import knowledge_evaluation
|
||||||
from .modifiable_property import is_modifiable_property
|
from .modifiable_property import is_modifiable_property
|
||||||
@ -21,32 +22,36 @@ class KnowledgeBase(object):
|
|||||||
|
|
||||||
def train(self, examples):
|
def train(self, examples):
|
||||||
knowledge_before = copy.deepcopy(self.knowledge)
|
knowledge_before = copy.deepcopy(self.knowledge)
|
||||||
|
with session().log('Train'):
|
||||||
# Parse everything
|
# Parse everything
|
||||||
for example in examples:
|
for example in examples:
|
||||||
# If there's parsed data, leverage it ASAP
|
# If there's parsed data, leverage it ASAP
|
||||||
if 'parsed' in example:
|
if 'parsed' in example:
|
||||||
|
with session().log('parsed information integration'):
|
||||||
result = knowledge_evaluation.integrate_information(self.knowledge, {
|
result = knowledge_evaluation.integrate_information(self.knowledge, {
|
||||||
"parsed": example['parsed'],
|
"parsed": example['parsed'],
|
||||||
})
|
})
|
||||||
self.act_upon(result)
|
self.act_upon(result)
|
||||||
|
|
||||||
logging.debug("\x1b[7;32m> {} \x1b[0m".format(example))
|
with session().log("language integration"):
|
||||||
tokens, decomposition, inferred_tree = parsing.integrate_language(self, example)
|
tokens, decomposition, inferred_tree = parsing.integrate_language(self, example)
|
||||||
logging.debug(tokens)
|
session().annotate(tokens)
|
||||||
|
|
||||||
|
with session().log("full information integration"):
|
||||||
result = knowledge_evaluation.integrate_information(self.knowledge, {
|
result = knowledge_evaluation.integrate_information(self.knowledge, {
|
||||||
"elements": tokens,
|
"elements": tokens,
|
||||||
"decomposition": decomposition,
|
"decomposition": decomposition,
|
||||||
"parsed": inferred_tree,
|
"parsed": inferred_tree,
|
||||||
})
|
})
|
||||||
|
|
||||||
logging.debug("\x1b[7;33m< {} \x1b[0m".format(self.get_value(result)))
|
session().annotate("Result: {}".format(self.get_value(result)))
|
||||||
self.act_upon(result)
|
self.act_upon(result)
|
||||||
logging.debug("\x1b[7;34m> set: {} \x1b[0m".format(self.get_value(result)))
|
session().annotate("Set: {}".format(self.get_value(result)))
|
||||||
self.examples.append((decomposition, inferred_tree))
|
self.examples.append((decomposition, inferred_tree))
|
||||||
self.originals.append(example['text'])
|
self.originals.append(example['text'])
|
||||||
|
|
||||||
# Reduce values
|
# Reduce values
|
||||||
|
with session().log("reprocessing"):
|
||||||
self.trained = parsing.reprocess_language_knowledge(self, self.examples)
|
self.trained = parsing.reprocess_language_knowledge(self, self.examples)
|
||||||
|
|
||||||
knowledge_after = copy.deepcopy(self.knowledge)
|
knowledge_after = copy.deepcopy(self.knowledge)
|
||||||
@ -58,7 +63,7 @@ class KnowledgeBase(object):
|
|||||||
def process(self, row):
|
def process(self, row):
|
||||||
row = row.lower()
|
row = row.lower()
|
||||||
knowledge_before = copy.deepcopy(self.knowledge)
|
knowledge_before = copy.deepcopy(self.knowledge)
|
||||||
logging.debug("\x1b[7;32m> {} \x1b[0m".format(row))
|
with session().log("Process: {}".format(row)):
|
||||||
tokens = parsing.to_tokens(row)
|
tokens = parsing.to_tokens(row)
|
||||||
fit = parsing.get_fit(self, tokens)
|
fit = parsing.get_fit(self, tokens)
|
||||||
if fit is None:
|
if fit is None:
|
||||||
@ -71,6 +76,7 @@ class KnowledgeBase(object):
|
|||||||
"parsed": inferred_tree,
|
"parsed": inferred_tree,
|
||||||
})
|
})
|
||||||
self.act_upon(result)
|
self.act_upon(result)
|
||||||
|
session().annotate("Result: {}".format(result))
|
||||||
|
|
||||||
knowledge_after = copy.deepcopy(self.knowledge)
|
knowledge_after = copy.deepcopy(self.knowledge)
|
||||||
knowledge_diff_getter = lambda: diff_knowledge(knowledge_before,
|
knowledge_diff_getter = lambda: diff_knowledge(knowledge_before,
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import logging
|
from .session.org_mode import global_session as session
|
||||||
|
|
||||||
from .modifiable_property import (
|
from .modifiable_property import (
|
||||||
ModifiableProperty,
|
ModifiableProperty,
|
||||||
@ -11,7 +11,7 @@ def resolve(knowledge_base, elements, value):
|
|||||||
if isinstance(value, int):
|
if isinstance(value, int):
|
||||||
return elements[value]
|
return elements[value]
|
||||||
elif isinstance(value, tuple) or isinstance(value, list):
|
elif isinstance(value, tuple) or isinstance(value, list):
|
||||||
logging.debug("V: {} {}".format(value, elements))
|
session().annotate("V: {} {}".format(value, elements))
|
||||||
return integrate_information(knowledge_base, {
|
return integrate_information(knowledge_base, {
|
||||||
"elements": elements,
|
"elements": elements,
|
||||||
"parsed": value,
|
"parsed": value,
|
||||||
@ -103,16 +103,16 @@ def exists_property_with_value(knowledge_base, elements, subj, value):
|
|||||||
|
|
||||||
|
|
||||||
def modifiable_element_for_existance_in_set(container, set_name, element):
|
def modifiable_element_for_existance_in_set(container, set_name, element):
|
||||||
logging.debug("-----({} {} {})".format(container, set_name, element))
|
session().annotate("-----({} {} {})".format(container, set_name, element))
|
||||||
|
|
||||||
def getter():
|
def getter():
|
||||||
nonlocal container, set_name, element
|
nonlocal container, set_name, element
|
||||||
logging.debug(" get({} {} {})".format(container, set_name, element))
|
session().annotate(" get({} {} {})".format(container, set_name, element))
|
||||||
return (set_name in container) and (element in container[set_name])
|
return (set_name in container) and (element in container[set_name])
|
||||||
|
|
||||||
def setter():
|
def setter():
|
||||||
nonlocal container, set_name, element
|
nonlocal container, set_name, element
|
||||||
logging.debug(" add({} {} {})".format(container, set_name, element))
|
session().annotate(" add({} {} {})".format(container, set_name, element))
|
||||||
return container[set_name].add(element)
|
return container[set_name].add(element)
|
||||||
|
|
||||||
return ModifiableProperty(
|
return ModifiableProperty(
|
||||||
@ -219,7 +219,7 @@ def perform_verb_over_object(knowledge_base, elements, subj, verb, obj):
|
|||||||
subj = resolve(knowledge_base, elements, subj)
|
subj = resolve(knowledge_base, elements, subj)
|
||||||
verb = resolve(knowledge_base, elements, verb)
|
verb = resolve(knowledge_base, elements, verb)
|
||||||
obj = resolve(knowledge_base, elements, obj)
|
obj = resolve(knowledge_base, elements, obj)
|
||||||
logging.debug("({} {} {})".format(verb, subj, obj))
|
session().annotate("({} {} {})".format(verb, subj, obj))
|
||||||
|
|
||||||
if subj not in knowledge_base:
|
if subj not in knowledge_base:
|
||||||
knowledge_base[subj] = {'groups': set()}
|
knowledge_base[subj] = {'groups': set()}
|
||||||
@ -263,10 +263,10 @@ def integrate_information(knowledge_base, example):
|
|||||||
args = ast[1:]
|
args = ast[1:]
|
||||||
elements = example.get('elements', None)
|
elements = example.get('elements', None)
|
||||||
|
|
||||||
logging.debug("Integrating:")
|
session().annotate("Integrating:")
|
||||||
logging.debug("AST: {}".format(ast))
|
session().annotate("AST: {}".format(ast))
|
||||||
logging.debug("ARG: {}".format(elements))
|
session().annotate("ARG: {}".format(elements))
|
||||||
logging.debug("------------")
|
session().annotate("------------")
|
||||||
|
|
||||||
return tagged_with_ast(
|
return tagged_with_ast(
|
||||||
ast, elements,
|
ast, elements,
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
from . import knowledge_evaluation
|
from . import knowledge_evaluation
|
||||||
|
|
||||||
from . import depth_meter
|
from . import depth_meter
|
||||||
import logging
|
from .session.org_mode import global_session as session
|
||||||
import re
|
import re
|
||||||
import copy
|
import copy
|
||||||
|
|
||||||
@ -20,9 +20,9 @@ def to_tokens(text):
|
|||||||
def make_template(knowledge_base, tokens, parsed):
|
def make_template(knowledge_base, tokens, parsed):
|
||||||
matcher = list(tokens)
|
matcher = list(tokens)
|
||||||
template = list(parsed)
|
template = list(parsed)
|
||||||
logging.debug(" -- MK TEMPLATE --")
|
session().annotate(" -- MK TEMPLATE --")
|
||||||
logging.debug("MATCHR: {}".format(matcher))
|
session().annotate("MATCHR: {}".format(matcher))
|
||||||
logging.debug("TEMPLT: {}".format(template))
|
session().annotate("TEMPLT: {}".format(template))
|
||||||
for i in range(len(matcher)):
|
for i in range(len(matcher)):
|
||||||
word = matcher[i]
|
word = matcher[i]
|
||||||
if word in template:
|
if word in template:
|
||||||
@ -59,11 +59,11 @@ def get_lower_levels(parsed):
|
|||||||
|
|
||||||
# TODO: probably optimize this, it creates lots of unnecessary tuples
|
# TODO: probably optimize this, it creates lots of unnecessary tuples
|
||||||
def replace_position(tree, position, new_element):
|
def replace_position(tree, position, new_element):
|
||||||
logging.debug("REPLACE POSITIONS:")
|
session().annotate("REPLACE POSITIONS:")
|
||||||
logging.debug(" TREE : {}".format(tree))
|
session().annotate(" TREE : {}".format(tree))
|
||||||
logging.debug("POSITION: {}".format(position))
|
session().annotate("POSITION: {}".format(position))
|
||||||
logging.debug("NEW ELEM: {}".format(new_element))
|
session().annotate("NEW ELEM: {}".format(new_element))
|
||||||
logging.debug("------------------")
|
session().annotate("------------------")
|
||||||
|
|
||||||
def aux(current_tree, remaining_route):
|
def aux(current_tree, remaining_route):
|
||||||
if len(remaining_route) == 0:
|
if len(remaining_route) == 0:
|
||||||
@ -78,7 +78,7 @@ def replace_position(tree, position, new_element):
|
|||||||
)
|
)
|
||||||
|
|
||||||
result = aux(tree, position)
|
result = aux(tree, position)
|
||||||
logging.debug("-RESULT: {}".format(result))
|
session().annotate("-RESULT: {}".format(result))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@ -90,37 +90,37 @@ def integrate_language(knowledge_base, example):
|
|||||||
tokens = to_tokens(text)
|
tokens = to_tokens(text)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
logging.debug("P: {}".format(resolved_parsed))
|
session().annotate("P: {}".format(resolved_parsed))
|
||||||
lower_levels = get_lower_levels(resolved_parsed)
|
lower_levels = get_lower_levels(resolved_parsed)
|
||||||
logging.debug("Lower: {}".format(lower_levels))
|
session().annotate("Lower: {}".format(lower_levels))
|
||||||
if len(lower_levels) == 0:
|
if len(lower_levels) == 0:
|
||||||
break
|
break
|
||||||
|
|
||||||
for position, atom in lower_levels:
|
for position, atom in lower_levels:
|
||||||
logging.debug("\x1b[1mSelecting\x1b[0m: {}".format(atom))
|
with session().log("Atom {}".format(atom)):
|
||||||
similar = get_similar_tree(knowledge_base, atom, tokens)
|
similar = get_similar_tree(knowledge_base, atom, tokens)
|
||||||
remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar)
|
remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar)
|
||||||
|
|
||||||
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
|
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
|
||||||
logging.debug("--FIND MIX--")
|
session().annotate("--FIND MIX--")
|
||||||
logging.debug("-MIX- | {}".format(remix))
|
session().annotate("-MIX- | {}".format(remix))
|
||||||
logging.debug("-FRM- | {}".format(tokens))
|
session().annotate("-FRM- | {}".format(tokens))
|
||||||
logging.debug("-AFT- | {}".format(after_remix))
|
session().annotate("-AFT- | {}".format(after_remix))
|
||||||
|
|
||||||
logging.debug("--- TEMPLATE ---")
|
session().annotate("--- TEMPLATE ---")
|
||||||
|
|
||||||
_, matcher, result = make_template(knowledge_base, after_remix, atom)
|
_, matcher, result = make_template(knowledge_base, after_remix, atom)
|
||||||
logging.debug("Tx: {}".format(after_remix))
|
session().annotate("Tx: {}".format(after_remix))
|
||||||
logging.debug("Mx: {}".format(matcher))
|
session().annotate("Mx: {}".format(matcher))
|
||||||
logging.debug("Rx: {}".format(result))
|
session().annotate("Rx: {}".format(result))
|
||||||
logging.debug("Sx: {}".format(start_bounds))
|
session().annotate("Sx: {}".format(start_bounds))
|
||||||
logging.debug("Ex: {}".format(end_bounds))
|
session().annotate("Ex: {}".format(end_bounds))
|
||||||
|
|
||||||
|
|
||||||
assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens))
|
assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens))
|
||||||
logging.debug( " +-> {}".format(after_remix))
|
session().annotate( " +-> {}".format(after_remix))
|
||||||
subquery_type = knowledge_evaluation.get_subquery_type(knowledge_base.knowledge, atom)
|
subquery_type = knowledge_evaluation.get_subquery_type(knowledge_base.knowledge, atom)
|
||||||
logging.debug(r" \-> <{}>".format(subquery_type))
|
session().annotate(r" \-> <{}>".format(subquery_type))
|
||||||
|
|
||||||
# Clean remaining tokens
|
# Clean remaining tokens
|
||||||
new_tokens = list(tokens)
|
new_tokens = list(tokens)
|
||||||
@ -133,16 +133,16 @@ def integrate_language(knowledge_base, example):
|
|||||||
tokens = new_tokens
|
tokens = new_tokens
|
||||||
|
|
||||||
resolved_parsed = replace_position(resolved_parsed, position, offset)
|
resolved_parsed = replace_position(resolved_parsed, position, offset)
|
||||||
logging.debug("RP: {}".format(resolved_parsed))
|
session().annotate("RP: {}".format(resolved_parsed))
|
||||||
logging.debug("AT: {}".format(atom))
|
session().annotate("AT: {}".format(atom))
|
||||||
logging.debug("#########")
|
session().annotate("#########")
|
||||||
|
|
||||||
|
|
||||||
tokens, matcher, result = make_template(knowledge_base, tokens, resolved_parsed)
|
tokens, matcher, result = make_template(knowledge_base, tokens, resolved_parsed)
|
||||||
logging.debug("T: {}".format(tokens))
|
session().annotate("T: {}".format(tokens))
|
||||||
logging.debug("M: {}".format(matcher))
|
session().annotate("M: {}".format(matcher))
|
||||||
logging.debug("R: {}".format(result))
|
session().annotate("R: {}".format(result))
|
||||||
logging.debug("---")
|
session().annotate("---")
|
||||||
return tokens, matcher, result
|
return tokens, matcher, result
|
||||||
|
|
||||||
|
|
||||||
@ -180,8 +180,8 @@ def get_possible_remixes(knowledge_base, matcher, similar_matcher):
|
|||||||
|
|
||||||
matrix = []
|
matrix = []
|
||||||
for element in matcher:
|
for element in matcher:
|
||||||
logging.debug("- {}".format(element))
|
session().annotate("- {}".format(element))
|
||||||
logging.debug("+ {}".format(similar_matcher))
|
session().annotate("+ {}".format(similar_matcher))
|
||||||
if element in similar_matcher or isinstance(element, dict):
|
if element in similar_matcher or isinstance(element, dict):
|
||||||
if isinstance(element, dict):
|
if isinstance(element, dict):
|
||||||
indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
|
indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
|
||||||
@ -298,12 +298,10 @@ def get_similar_tree(knowledge_base, atom, tokens):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
for i, possibility in enumerate(sorted_possibilities):
|
for i, possibility in enumerate(sorted_possibilities):
|
||||||
logging.debug('---- POSSIBILITY #{} ----'.format(i))
|
|
||||||
similar_matcher, similar_result, similar_result_resolved, _, _ = possibility
|
similar_matcher, similar_result, similar_result_resolved, _, _ = possibility
|
||||||
logging.debug('AST: {}'.format(similar_result))
|
with session().log("Like {}".format(similar_matcher)):
|
||||||
logging.debug('Based on: {}'.format(similar_matcher))
|
session().annotate('Results on: {}'.format(similar_result_resolved))
|
||||||
logging.debug('Results on: {}'.format(similar_result_resolved))
|
session().annotate('AST: {}'.format(similar_result))
|
||||||
logging.debug('---------------------')
|
|
||||||
|
|
||||||
return sorted_possibilities[0]
|
return sorted_possibilities[0]
|
||||||
|
|
||||||
@ -382,9 +380,9 @@ def get_fit(knowledge, tokens, remaining_recursions=parameters.MAX_RECURSIONS):
|
|||||||
|
|
||||||
if result is not None:
|
if result is not None:
|
||||||
results.append(result)
|
results.append(result)
|
||||||
logging.debug("XXX {}".format(result))
|
session().annotate("XXX {}".format(result))
|
||||||
|
|
||||||
logging.debug(' - ' + '\n - '.join(map(str, results)))
|
session().annotate(' - ' + '\n - '.join(map(str, results)))
|
||||||
if len(results) > 0:
|
if len(results) > 0:
|
||||||
return results[0]
|
return results[0]
|
||||||
|
|
||||||
@ -419,7 +417,7 @@ def resolve_fit(knowledge, fit, remaining_recursions):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
minitokens, miniast = minifit
|
minitokens, miniast = minifit
|
||||||
logging.debug(" AST | {}".format(miniast))
|
session().annotate(" AST | {}".format(miniast))
|
||||||
subproperty = knowledge_evaluation.resolve(knowledge.knowledge, minitokens, miniast)
|
subproperty = knowledge_evaluation.resolve(knowledge.knowledge, minitokens, miniast)
|
||||||
fitted.append(subproperty)
|
fitted.append(subproperty)
|
||||||
|
|
||||||
@ -429,17 +427,17 @@ def resolve_fit(knowledge, fit, remaining_recursions):
|
|||||||
def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
|
def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
|
||||||
segment_possibilities = [([], tokens)] # Matched tokens, remaining tokens
|
segment_possibilities = [([], tokens)] # Matched tokens, remaining tokens
|
||||||
indent = ' ' * (parameters.MAX_RECURSIONS - remaining_recursions)
|
indent = ' ' * (parameters.MAX_RECURSIONS - remaining_recursions)
|
||||||
logging.debug(indent + 'T> {}'.format(tokens))
|
session().annotate(indent + 'T> {}'.format(tokens))
|
||||||
logging.debug(indent + 'M> {}'.format(matcher))
|
session().annotate(indent + 'M> {}'.format(matcher))
|
||||||
for minisegment in matcher:
|
for minisegment in matcher:
|
||||||
possibilities_after_round = []
|
possibilities_after_round = []
|
||||||
logging.debug(indent + "MS {}".format(minisegment))
|
session().annotate(indent + "MS {}".format(minisegment))
|
||||||
for matched_tokens, remaining_tokens in segment_possibilities:
|
for matched_tokens, remaining_tokens in segment_possibilities:
|
||||||
if len(remaining_tokens) < 1:
|
if len(remaining_tokens) < 1:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
logging.debug(indent + "RT {}".format(remaining_tokens[0]))
|
session().annotate(indent + "RT {}".format(remaining_tokens[0]))
|
||||||
logging.debug(indent + "DEF {}".format(is_definite_minisegment(minisegment)))
|
session().annotate(indent + "DEF {}".format(is_definite_minisegment(minisegment)))
|
||||||
if is_definite_minisegment(minisegment):
|
if is_definite_minisegment(minisegment):
|
||||||
# What if not match -----<
|
# What if not match -----<
|
||||||
if match_token(knowledge, remaining_tokens[0], minisegment):
|
if match_token(knowledge, remaining_tokens[0], minisegment):
|
||||||
@ -455,10 +453,10 @@ def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
|
|||||||
matched_tokens + [(minisegment, remaining_tokens[:i])],
|
matched_tokens + [(minisegment, remaining_tokens[:i])],
|
||||||
remaining_tokens[i:]
|
remaining_tokens[i:]
|
||||||
))
|
))
|
||||||
logging.debug(indent + "## PA {}".format(possibilities_after_round))
|
session().annotate(indent + "## PA {}".format(possibilities_after_round))
|
||||||
else:
|
else:
|
||||||
segment_possibilities = possibilities_after_round
|
segment_possibilities = possibilities_after_round
|
||||||
logging.debug(">>>> {}".format(len(segment_possibilities)))
|
session().annotate(">>>> {}".format(len(segment_possibilities)))
|
||||||
|
|
||||||
fully_matched_segments = [(matched, remaining)
|
fully_matched_segments = [(matched, remaining)
|
||||||
for (matched, remaining)
|
for (matched, remaining)
|
||||||
@ -467,11 +465,11 @@ def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
|
|||||||
|
|
||||||
resolved_fits = []
|
resolved_fits = []
|
||||||
for fit, _ in fully_matched_segments:
|
for fit, _ in fully_matched_segments:
|
||||||
logging.debug(indent + "::: {}".format(fit)) # REMIXES HAVE TO BE APPLIED BEFORE!!!
|
session().annotate(indent + "::: {}".format(fit)) # REMIXES HAVE TO BE APPLIED BEFORE!!!
|
||||||
|
|
||||||
logging.debug(indent + '*' * 20)
|
session().annotate(indent + '*' * 20)
|
||||||
for fit, _ in fully_matched_segments:
|
for fit, _ in fully_matched_segments:
|
||||||
logging.debug(indent + "::: {}".format(fit)) # REMIXES HAVE TO BE APPLIED BEFORE!!!
|
session().annotate(indent + "::: {}".format(fit)) # REMIXES HAVE TO BE APPLIED BEFORE!!!
|
||||||
resolved_fit = resolve_fit(knowledge, fit, remaining_recursions)
|
resolved_fit = resolve_fit(knowledge, fit, remaining_recursions)
|
||||||
if resolved_fit is not None:
|
if resolved_fit is not None:
|
||||||
resolved_fits.append(resolved_fit)
|
resolved_fits.append(resolved_fit)
|
||||||
|
79
naive-nlu/tree_nlu/session/org_mode.py
Normal file
79
naive-nlu/tree_nlu/session/org_mode.py
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
import logging
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
SESSION = None
|
||||||
|
|
||||||
|
def __gen_session_name__():
|
||||||
|
now = datetime.datetime.utcnow()
|
||||||
|
return "treeNLU-session-{}.org".format(
|
||||||
|
now.strftime("%y_%m_%d %H:%M:%S_%f"))
|
||||||
|
|
||||||
|
|
||||||
|
def create_global_session(fname):
|
||||||
|
global SESSION
|
||||||
|
SESSION = OrgModeSession(fname)
|
||||||
|
|
||||||
|
|
||||||
|
def global_session():
|
||||||
|
if SESSION is None:
|
||||||
|
session_name = __gen_session_name__()
|
||||||
|
logging.warn("Session not created, saved on {}".format(session_name))
|
||||||
|
create_global_session(session_name)
|
||||||
|
|
||||||
|
assert(SESSION is not None)
|
||||||
|
return SESSION
|
||||||
|
|
||||||
|
|
||||||
|
def get_header():
|
||||||
|
now = datetime.datetime.utcnow()
|
||||||
|
return ("# Ran on {}\n".format(
|
||||||
|
now.strftime("%y/%m/%d %H:%M:%S.%f")))
|
||||||
|
|
||||||
|
class LevelContext:
|
||||||
|
def __init__(self, increaser, decreaser):
|
||||||
|
self.increaser = increaser
|
||||||
|
self.decreaser = decreaser
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.increaser()
|
||||||
|
|
||||||
|
def __exit__(self, _type, _value, _traceback):
|
||||||
|
self.decreaser()
|
||||||
|
|
||||||
|
|
||||||
|
class OrgModeSession:
|
||||||
|
def __init__(self, fname):
|
||||||
|
self.f = open(fname, 'wt')
|
||||||
|
self.level = 0
|
||||||
|
self.dirty = False
|
||||||
|
|
||||||
|
self.f.write(get_header())
|
||||||
|
|
||||||
|
def annotate(self, annotation):
|
||||||
|
if self.dirty:
|
||||||
|
self.f.write("{indentation} {data}\n".format(
|
||||||
|
indentation='*' * (self.level + 1),
|
||||||
|
data="---"))
|
||||||
|
self.dirty = False
|
||||||
|
|
||||||
|
self.f.write("{indentation} {data}\n".format(
|
||||||
|
indentation=' ' * (self.level + 2 + 1),
|
||||||
|
data=annotation))
|
||||||
|
|
||||||
|
def log(self, string):
|
||||||
|
self.f.write("{indentation} {data}\n".format(
|
||||||
|
indentation='*' * (self.level + 1),
|
||||||
|
data=string))
|
||||||
|
self.dirty = False
|
||||||
|
|
||||||
|
return LevelContext(self.inc_level, self.dec_level)
|
||||||
|
|
||||||
|
def inc_level(self):
|
||||||
|
self.level += 1
|
||||||
|
|
||||||
|
def dec_level(self):
|
||||||
|
self.level -= 1
|
||||||
|
self.dirty = True
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.f.close()
|
@ -1,5 +1,7 @@
|
|||||||
import traceback
|
import traceback
|
||||||
import logging
|
import logging
|
||||||
|
import datetime
|
||||||
|
from .session import org_mode
|
||||||
from .tests import basic
|
from .tests import basic
|
||||||
from .tests import gac_100
|
from .tests import gac_100
|
||||||
from .tests import gac_extension
|
from .tests import gac_extension
|
||||||
@ -12,7 +14,13 @@ tests = (
|
|||||||
("gac+", gac_extension),
|
("gac+", gac_extension),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def gen_session_name():
|
||||||
|
return "treeNLU-test-session.org"
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
org_mode.create_global_session(gen_session_name())
|
||||||
failed = False
|
failed = False
|
||||||
for test_name, test_module in tests:
|
for test_name, test_module in tests:
|
||||||
try:
|
try:
|
||||||
@ -29,6 +37,7 @@ def main():
|
|||||||
failed = True
|
failed = True
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
raise
|
raise
|
||||||
|
org_mode.global_session().close()
|
||||||
|
|
||||||
if failed:
|
if failed:
|
||||||
exit(1)
|
exit(1)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import logging
|
from ..session.org_mode import global_session as session
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from ..knowledge_base import KnowledgeBase
|
from ..knowledge_base import KnowledgeBase
|
||||||
@ -110,13 +110,13 @@ base_knowledge = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
def test_assumption(expectedResponse, knowledge, query):
|
def test_assumption(expectedResponse, knowledge, query):
|
||||||
logging.debug("Query: {}".format(query['text']))
|
with session().log(query['text']):
|
||||||
logging.debug("Expected: {}".format(expectedResponse))
|
session().annotate("Expected: {}".format(expectedResponse))
|
||||||
|
|
||||||
result, abstract_tree, diff = knowledge.process(query['text'])
|
result, abstract_tree, diff = knowledge.process(query['text'])
|
||||||
end_result = result.getter() if is_modifiable_property(result) else result
|
end_result = result.getter() if is_modifiable_property(result) else result
|
||||||
|
|
||||||
logging.debug("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result))
|
session().annotate("Result: {}".format(end_result))
|
||||||
if end_result != expectedResponse:
|
if end_result != expectedResponse:
|
||||||
raise AssertionError('{} is not {}'.format(end_result, expectedResponse))
|
raise AssertionError('{} is not {}'.format(end_result, expectedResponse))
|
||||||
|
|
||||||
@ -125,11 +125,13 @@ def main():
|
|||||||
knowledge=base_knowledge,
|
knowledge=base_knowledge,
|
||||||
)
|
)
|
||||||
|
|
||||||
differences = knowledge.train(examples)
|
for example in examples:
|
||||||
|
with session().log(example['text']):
|
||||||
|
differences = knowledge.train([example])
|
||||||
|
|
||||||
logging.debug("----")
|
session().annotate("----")
|
||||||
logging.debug(differences())
|
session().annotate(differences())
|
||||||
logging.debug("----")
|
session().annotate("----")
|
||||||
|
|
||||||
test_assumption(True, knowledge, {'text': 'earth is a planet'})
|
test_assumption(True, knowledge, {'text': 'earth is a planet'})
|
||||||
test_assumption(True, knowledge, {'text': 'is lava dangerous?'})
|
test_assumption(True, knowledge, {'text': 'is lava dangerous?'})
|
||||||
@ -137,12 +139,12 @@ def main():
|
|||||||
row = test['text']
|
row = test['text']
|
||||||
result, inferred_tree, differences = knowledge.process(row)
|
result, inferred_tree, differences = knowledge.process(row)
|
||||||
|
|
||||||
logging.debug("result:", result)
|
session().annotate("result: {}".format(result))
|
||||||
logging.debug(differences())
|
session().annotate(differences())
|
||||||
logging.debug("---")
|
session().annotate("---")
|
||||||
logging.debug('-----')
|
session().annotate('-----')
|
||||||
logging.debug(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
|
session().annotate(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
|
||||||
logging.debug('-----')
|
session().annotate('-----')
|
||||||
|
|
||||||
queryTrue = {
|
queryTrue = {
|
||||||
"text": "is io a moon?",
|
"text": "is io a moon?",
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import logging
|
from ..session.org_mode import global_session as session
|
||||||
from ..knowledge_base import KnowledgeBase
|
from ..knowledge_base import KnowledgeBase
|
||||||
from ..utils.visuals import show_progbar
|
from ..utils.visuals import show_progbar
|
||||||
from ..visualization import show_knowledge
|
from ..visualization import show_knowledge
|
||||||
@ -178,12 +178,12 @@ examples = [
|
|||||||
# "parsed": (),
|
# "parsed": (),
|
||||||
# "answer": None,
|
# "answer": None,
|
||||||
# },
|
# },
|
||||||
|
# ('text_example',
|
||||||
# {
|
# {
|
||||||
# "text": "Is water a liquid?",
|
# "question": "is water a liquid?",
|
||||||
# "affirmation": "Is water a liquid?",
|
# "affirmation": "water is a liquid",
|
||||||
# "parsed": (),
|
# "answer": True,
|
||||||
# "answer": None,
|
# }),
|
||||||
# },
|
|
||||||
# {
|
# {
|
||||||
# "text": "Is Bugs Bunny a cartoon character?",
|
# "text": "Is Bugs Bunny a cartoon character?",
|
||||||
# "affirmation": "Is Bugs Bunny a cartoon character?",
|
# "affirmation": "Is Bugs Bunny a cartoon character?",
|
||||||
@ -696,19 +696,20 @@ def main():
|
|||||||
|
|
||||||
for i, (example_type, data) in enumerate(examples):
|
for i, (example_type, data) in enumerate(examples):
|
||||||
if example_type == 'full_example':
|
if example_type == 'full_example':
|
||||||
logging.info(data['affirmation'])
|
|
||||||
affirmation = {
|
affirmation = {
|
||||||
'text': data['affirmation'],
|
'text': data['affirmation'],
|
||||||
'parsed': data['parsed'][1],
|
'parsed': data['parsed'][1],
|
||||||
}
|
}
|
||||||
question = data
|
question = data
|
||||||
|
|
||||||
|
with session().log(data['affirmation']):
|
||||||
show_progbar(i, total, data['affirmation'])
|
show_progbar(i, total, data['affirmation'])
|
||||||
differences = knowledge.train([affirmation])
|
differences = knowledge.train([affirmation])
|
||||||
|
|
||||||
|
with session().log(data['text']):
|
||||||
show_progbar(i, total, data['text'])
|
show_progbar(i, total, data['text'])
|
||||||
differences = knowledge.train([question])
|
differences = knowledge.train([question])
|
||||||
logging.debug(differences())
|
session().annotate(differences())
|
||||||
|
|
||||||
result, _, _ = knowledge.process(data['text'])
|
result, _, _ = knowledge.process(data['text'])
|
||||||
|
|
||||||
@ -720,16 +721,16 @@ def main():
|
|||||||
raise AssertionError('{} is not {}'.format(result, data['answer']))
|
raise AssertionError('{} is not {}'.format(result, data['answer']))
|
||||||
|
|
||||||
elif example_type == 'text_example':
|
elif example_type == 'text_example':
|
||||||
logging.info(data['affirmation'])
|
with session().log(data['affirmation']):
|
||||||
|
|
||||||
show_progbar(i, total, data['affirmation'])
|
show_progbar(i, total, data['affirmation'])
|
||||||
affirmation = data['affirmation']
|
affirmation = data['affirmation']
|
||||||
logging.debug("Processing affirmation: {}".format(affirmation))
|
session().annotate("Processing affirmation: {}".format(affirmation))
|
||||||
_, _, _ = knowledge.process(affirmation)
|
_, _, _ = knowledge.process(affirmation)
|
||||||
|
|
||||||
|
with session().log(data['question']):
|
||||||
show_progbar(i, total, data['question'])
|
show_progbar(i, total, data['question'])
|
||||||
question = data['question']
|
question = data['question']
|
||||||
logging.debug("Processing question : {}".format(question))
|
session().annotate("Processing question : {}".format(question))
|
||||||
result, _, _ = knowledge.process(question)
|
result, _, _ = knowledge.process(question)
|
||||||
|
|
||||||
if result != data['answer']:
|
if result != data['answer']:
|
||||||
|
@ -1,14 +1,18 @@
|
|||||||
from ..knowledge_base import KnowledgeBase
|
from ..knowledge_base import KnowledgeBase
|
||||||
|
from ..session.org_mode import global_session as session
|
||||||
|
|
||||||
from . import gac_100
|
from . import gac_100
|
||||||
|
|
||||||
|
|
||||||
def ask_then_learn_test(knowledge: KnowledgeBase):
|
def ask_then_learn_test(knowledge: KnowledgeBase):
|
||||||
|
with session().log("is icecream blue?"):
|
||||||
ret, _, _ = knowledge.process("is icecream blue?")
|
ret, _, _ = knowledge.process("is icecream blue?")
|
||||||
assert(ret is False)
|
assert(ret is False)
|
||||||
|
|
||||||
|
with session().log("icecream is blue"):
|
||||||
ret, _, _ = knowledge.process("icecream is blue")
|
ret, _, _ = knowledge.process("icecream is blue")
|
||||||
|
|
||||||
|
with session().log("is icecream blue?"):
|
||||||
ret, _, _ = knowledge.process("is icecream blue?")
|
ret, _, _ = knowledge.process("is icecream blue?")
|
||||||
assert(ret is True)
|
assert(ret is True)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user