Pass test using layer structure.

This commit is contained in:
kenkeiras 2018-04-24 23:01:36 +02:00
parent a444766c7c
commit 1ded981099
5 changed files with 36 additions and 24 deletions

View File

@ -58,7 +58,8 @@ class KnowledgeBase(object):
# Reduce values # Reduce values
with session().log("reprocessing"): with session().log("reprocessing"):
self.layers.reprocess(self.examples) res = self.layers.reprocess(self.examples)
self.trained = res
knowledge_after = copy.deepcopy(self.knowledge) knowledge_after = copy.deepcopy(self.knowledge)
knowledge_diff_getter = lambda: diff_knowledge(knowledge_before, knowledge_diff_getter = lambda: diff_knowledge(knowledge_before,
@ -69,9 +70,7 @@ class KnowledgeBase(object):
def process(self, row): def process(self, row):
knowledge_before = copy.deepcopy(self.knowledge) knowledge_before = copy.deepcopy(self.knowledge)
with session().log("Process: {}".format(row)): with session().log("Process: {}".format(row)):
tokens = self.tokenize(row) fit = self.layers.process(self, row)
fit = parsing.get_fit(self, tokens)
if fit is None: if fit is None:
return None return None

View File

@ -1,16 +1,18 @@
from .layers import tokenization_layer from .layers import tokenization_layer
from .layers import parsing_layer from .layers import parsing_layer
from .layers import parsing
from .session.org_mode import global_session as session
def make_yield_pipe(layers, knowledge_base, example): def make_yield_pipe(layers, knowledge_base, example, func):
if len(layers) < 1: if len(layers) < 1:
yield example yield example
return return
input_generator = make_yield_pipe(layers[:-1], knowledge_base, example) input_generator = make_yield_pipe(layers[:-1], knowledge_base, example, func)
for input in input_generator: for input in input_generator:
print("-->", input) session().annotate("[{}] --> {}".format(len(layers), input))
for d in list(layers[-1].integrate(knowledge_base, input)): for d in list(func(layers[-1], input)):
yield d yield d
@ -25,15 +27,10 @@ class BaseModel:
] ]
def reprocess(self, examples): def reprocess(self, examples):
for example in examples:
self._reprocess_single(example)
def _reprocess_single(self, example):
return
pattern_examples = [] pattern_examples = []
for i, sample in enumerate(examples): for i, sample in enumerate(examples):
other = examples[:i] + examples[i + 1:] other = examples[:i] + examples[i + 1:]
match = get_matching(sample, other) match = parsing.get_matching(sample, other)
if len(match) > 0: if len(match) > 0:
sample = (match, sample[1],) sample = (match, sample[1],)
pattern_examples.append(sample) pattern_examples.append(sample)
@ -41,7 +38,12 @@ class BaseModel:
return pattern_examples return pattern_examples
def integrate(self, knowledge_base, example): def integrate(self, knowledge_base, example):
yield from make_yield_pipe(self.layers, knowledge_base, example) yield from make_yield_pipe(self.layers, knowledge_base,
example, lambda l, i: l.integrate(knowledge_base, i))
def process(self, knowledge_base, example):
yield from make_yield_pipe(self.layers, knowledge_base,
example, lambda l, i: l.process(knowledge_base, i))
def tokenize(self, row, return_one=True): def tokenize(self, row, return_one=True):
return self.tokenization.to_tokens(row) return self.tokenization.to_tokens(row)

View File

@ -9,6 +9,7 @@ from typing import List, Dict
from ..modifiable_property import ModifiableProperty from ..modifiable_property import ModifiableProperty
from .. import parameters from .. import parameters
from ..atoms import Atom, a, is_atom from ..atoms import Atom, a, is_atom
from .. import knowledge_evaluation
def make_template(knowledge_base, tokens, parsed): def make_template(knowledge_base, tokens, parsed):
matcher = list(tokens) matcher = list(tokens)
@ -97,8 +98,9 @@ def integrate_language(knowledge_base, example):
result = build_remix_matrix(knowledge_base, tokens, atom, similar) result = build_remix_matrix(knowledge_base, tokens, atom, similar)
if result is not None: if result is not None:
break break
else:
raise Exception('Similar not found')
return
remix, (start_bounds, end_bounds) = result remix, (start_bounds, end_bounds) = result
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix) after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)

View File

@ -8,4 +8,7 @@ class ParsingLayer:
yield from parsing.integrate_language(knowledge_base, example) yield from parsing.integrate_language(knowledge_base, example)
def train(self, knowledge_base, example): def train(self, knowledge_base, example):
assert False assert False
def process(self, knowledge_base, input):
yield from parsing.get_fit(knowledge_base, input)

View File

@ -29,14 +29,20 @@ class TokenizationLayer:
def integrate(self, knowledge_base, data): def integrate(self, knowledge_base, data):
assert knowledge_base is self.knowledge_base assert knowledge_base is self.knowledge_base
print(data)
assert 'text' in data assert 'text' in data
with session().log("Tokenize: {}".format(data['text'])): tokens = self.tokenize(data['text'])
for tokens in tokenization.to_tokens(self, data['text']): data_with_row = copy.copy(data)
data_with_row = copy.copy(data) data_with_row['tokens'] = tokens
data_with_row['tokens'] = tokens yield data_with_row
print(data_with_row)
yield data_with_row # with session().log("Tokenize: {}".format(data['text'])):
# for tokens in tokenization.to_tokens(self, data['text']):
# data_with_row = copy.copy(data)
# data_with_row['tokens'] = tokens
# yield data_with_row
def process(self, knowledge_base, row):
yield self.tokenize(row)
def tokenize(self, row, return_one=True): def tokenize(self, row, return_one=True):