diff --git a/naive-nlu/tree_nlu/knowledge_base.py b/naive-nlu/tree_nlu/knowledge_base.py index 3302ea9..28ad221 100644 --- a/naive-nlu/tree_nlu/knowledge_base.py +++ b/naive-nlu/tree_nlu/knowledge_base.py @@ -58,7 +58,8 @@ class KnowledgeBase(object): # Reduce values with session().log("reprocessing"): - self.layers.reprocess(self.examples) + res = self.layers.reprocess(self.examples) + self.trained = res knowledge_after = copy.deepcopy(self.knowledge) knowledge_diff_getter = lambda: diff_knowledge(knowledge_before, @@ -69,9 +70,7 @@ class KnowledgeBase(object): def process(self, row): knowledge_before = copy.deepcopy(self.knowledge) with session().log("Process: {}".format(row)): - tokens = self.tokenize(row) - - fit = parsing.get_fit(self, tokens) + fit = self.layers.process(self, row) if fit is None: return None diff --git a/naive-nlu/tree_nlu/layered_model.py b/naive-nlu/tree_nlu/layered_model.py index 9ecc242..0aee057 100644 --- a/naive-nlu/tree_nlu/layered_model.py +++ b/naive-nlu/tree_nlu/layered_model.py @@ -1,16 +1,18 @@ from .layers import tokenization_layer from .layers import parsing_layer +from .layers import parsing +from .session.org_mode import global_session as session -def make_yield_pipe(layers, knowledge_base, example): +def make_yield_pipe(layers, knowledge_base, example, func): if len(layers) < 1: yield example return - input_generator = make_yield_pipe(layers[:-1], knowledge_base, example) + input_generator = make_yield_pipe(layers[:-1], knowledge_base, example, func) for input in input_generator: - print("-->", input) - for d in list(layers[-1].integrate(knowledge_base, input)): + session().annotate("[{}] --> {}".format(len(layers), input)) + for d in list(func(layers[-1], input)): yield d @@ -25,15 +27,10 @@ class BaseModel: ] def reprocess(self, examples): - for example in examples: - self._reprocess_single(example) - - def _reprocess_single(self, example): - return pattern_examples = [] for i, sample in enumerate(examples): other = examples[:i] + examples[i + 1:] - match = get_matching(sample, other) + match = parsing.get_matching(sample, other) if len(match) > 0: sample = (match, sample[1],) pattern_examples.append(sample) @@ -41,7 +38,12 @@ class BaseModel: return pattern_examples def integrate(self, knowledge_base, example): - yield from make_yield_pipe(self.layers, knowledge_base, example) + yield from make_yield_pipe(self.layers, knowledge_base, + example, lambda l, i: l.integrate(knowledge_base, i)) + + def process(self, knowledge_base, example): + yield from make_yield_pipe(self.layers, knowledge_base, + example, lambda l, i: l.process(knowledge_base, i)) def tokenize(self, row, return_one=True): return self.tokenization.to_tokens(row) diff --git a/naive-nlu/tree_nlu/layers/parsing.py b/naive-nlu/tree_nlu/layers/parsing.py index 7073a3a..69215d0 100644 --- a/naive-nlu/tree_nlu/layers/parsing.py +++ b/naive-nlu/tree_nlu/layers/parsing.py @@ -9,6 +9,7 @@ from typing import List, Dict from ..modifiable_property import ModifiableProperty from .. import parameters from ..atoms import Atom, a, is_atom +from .. import knowledge_evaluation def make_template(knowledge_base, tokens, parsed): matcher = list(tokens) @@ -97,8 +98,9 @@ def integrate_language(knowledge_base, example): result = build_remix_matrix(knowledge_base, tokens, atom, similar) if result is not None: break + else: + raise Exception('Similar not found') - return remix, (start_bounds, end_bounds) = result after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix) diff --git a/naive-nlu/tree_nlu/layers/parsing_layer.py b/naive-nlu/tree_nlu/layers/parsing_layer.py index 13b865d..b631c75 100644 --- a/naive-nlu/tree_nlu/layers/parsing_layer.py +++ b/naive-nlu/tree_nlu/layers/parsing_layer.py @@ -8,4 +8,7 @@ class ParsingLayer: yield from parsing.integrate_language(knowledge_base, example) def train(self, knowledge_base, example): - assert False \ No newline at end of file + assert False + + def process(self, knowledge_base, input): + yield from parsing.get_fit(knowledge_base, input) \ No newline at end of file diff --git a/naive-nlu/tree_nlu/layers/tokenization_layer.py b/naive-nlu/tree_nlu/layers/tokenization_layer.py index 1271818..28852fc 100644 --- a/naive-nlu/tree_nlu/layers/tokenization_layer.py +++ b/naive-nlu/tree_nlu/layers/tokenization_layer.py @@ -29,14 +29,20 @@ class TokenizationLayer: def integrate(self, knowledge_base, data): assert knowledge_base is self.knowledge_base - print(data) assert 'text' in data - with session().log("Tokenize: {}".format(data['text'])): - for tokens in tokenization.to_tokens(self, data['text']): - data_with_row = copy.copy(data) - data_with_row['tokens'] = tokens - print(data_with_row) - yield data_with_row + tokens = self.tokenize(data['text']) + data_with_row = copy.copy(data) + data_with_row['tokens'] = tokens + yield data_with_row + + # with session().log("Tokenize: {}".format(data['text'])): + # for tokens in tokenization.to_tokens(self, data['text']): + # data_with_row = copy.copy(data) + # data_with_row['tokens'] = tokens + # yield data_with_row + + def process(self, knowledge_base, row): + yield self.tokenize(row) def tokenize(self, row, return_one=True):