import copy import logging from .session.org_mode import global_session as session from .atoms import Atom from . import layered_model from . import knowledge_evaluation from .modifiable_property import is_modifiable_property import random def diff_knowledge(before, after): import jsondiff return jsondiff.diff(before, after) class KnowledgeBase(object): def __init__(self, knowledge={}, examples=[], trained=[]): self.knowledge = copy.copy(knowledge) self.originals = [] self.examples = copy.copy(examples) self.trained = copy.copy(trained) self.layers = layered_model.BaseModel(self) ## Parsing def train(self, examples): knowledge_before = copy.deepcopy(self.knowledge) with session().log('Train'): # Parse everything for example in examples: # If there's parsed data, leverage it ASAP if 'parsed' in example and isinstance(example['parsed'], tuple): with session().log('parsed information integration'): result = knowledge_evaluation.integrate_information(self.knowledge, { "parsed": example['parsed'], }) self.act_upon(result) with session().log("language integration"): for tokens, decomposition, inferred_tree in self.layers.integrate(self, example): session().annotate("Tokens: {}".format(tokens)) session().annotate("Inferred tree: {}".format(inferred_tree)) with session().log("full information integration"): tokens = self.layers.tokenization.tokenize(example['text'], return_one=True) result = knowledge_evaluation.integrate_information(self.knowledge, { "elements": tokens, "decomposition": decomposition, "parsed": inferred_tree, }) session().annotate("Result: {}".format(self.get_value(result))) self.act_upon(result) session().annotate("Set: {}".format(self.get_value(result))) self.examples.append((decomposition, inferred_tree)) self.originals.append(example['text']) # Reduce values with session().log("reprocessing"): res = self.layers.reprocess(self.examples) self.trained = res knowledge_after = copy.deepcopy(self.knowledge) knowledge_diff_getter = lambda: diff_knowledge(knowledge_before, knowledge_after) return knowledge_diff_getter def process(self, row): knowledge_before = copy.deepcopy(self.knowledge) with session().log("Process: {}".format(row)): fit = list(self.layers.process(self, row)) if len(fit) == 0: return None tokens, inferred_tree = fit[0] result = knowledge_evaluation.integrate_information(self.knowledge, { "elements": tokens, "parsed": inferred_tree, }) self.act_upon(result) session().annotate("Result: {}".format(result)) knowledge_after = copy.deepcopy(self.knowledge) knowledge_diff_getter = lambda: diff_knowledge(knowledge_before, knowledge_after) return result, inferred_tree, knowledge_diff_getter def get_value(self, result): if is_modifiable_property(result): return result.getter() else: return result def act_upon(self, result): if is_modifiable_property(result): result.setter() else: logging.warning("Cannot act upon: {}".format(result))