import copy import logging from .session.org_mode import global_session as session from . import parsing from . import knowledge_evaluation from .modifiable_property import is_modifiable_property def diff_knowledge(before, after): import jsondiff return jsondiff.diff(before, after) class KnowledgeBase(object): def __init__(self, knowledge, examples=[], trained=[]): self.knowledge = copy.copy(knowledge) self.originals = [] self.examples = copy.copy(examples) self.trained = copy.copy(trained) def train(self, examples): knowledge_before = copy.deepcopy(self.knowledge) with session().log('Train'): # Parse everything for example in examples: # If there's parsed data, leverage it ASAP if 'parsed' in example: with session().log('parsed information integration'): result = knowledge_evaluation.integrate_information(self.knowledge, { "parsed": example['parsed'], }) self.act_upon(result) with session().log("language integration"): tokens, decomposition, inferred_tree = parsing.integrate_language(self, example) session().annotate(tokens) with session().log("full information integration"): result = knowledge_evaluation.integrate_information(self.knowledge, { "elements": tokens, "decomposition": decomposition, "parsed": inferred_tree, }) session().annotate("Result: {}".format(self.get_value(result))) self.act_upon(result) session().annotate("Set: {}".format(self.get_value(result))) self.examples.append((decomposition, inferred_tree)) self.originals.append(example['text']) # Reduce values with session().log("reprocessing"): self.trained = parsing.reprocess_language_knowledge(self, self.examples) knowledge_after = copy.deepcopy(self.knowledge) knowledge_diff_getter = lambda: diff_knowledge(knowledge_before, knowledge_after) return knowledge_diff_getter def process(self, row): row = row.lower() knowledge_before = copy.deepcopy(self.knowledge) with session().log("Process: {}".format(row)): tokens = parsing.to_tokens(row) fit = parsing.get_fit(self, tokens) if fit is None: return None tokens, inferred_tree = fit result = knowledge_evaluation.integrate_information(self.knowledge, { "elements": tokens, "parsed": inferred_tree, }) self.act_upon(result) session().annotate("Result: {}".format(result)) knowledge_after = copy.deepcopy(self.knowledge) knowledge_diff_getter = lambda: diff_knowledge(knowledge_before, knowledge_after) return result, inferred_tree, knowledge_diff_getter def get_value(self, result): if is_modifiable_property(result): return result.getter() else: return result def act_upon(self, result): if is_modifiable_property(result): result.setter() else: logging.warning("Cannot act upon: {}".format(result))