Pass test using layer structure.

This commit is contained in:
kenkeiras 2018-04-24 23:01:36 +02:00
parent a444766c7c
commit 1ded981099
5 changed files with 36 additions and 24 deletions

View File

@ -58,7 +58,8 @@ class KnowledgeBase(object):
# Reduce values
with session().log("reprocessing"):
self.layers.reprocess(self.examples)
res = self.layers.reprocess(self.examples)
self.trained = res
knowledge_after = copy.deepcopy(self.knowledge)
knowledge_diff_getter = lambda: diff_knowledge(knowledge_before,
@ -69,9 +70,7 @@ class KnowledgeBase(object):
def process(self, row):
knowledge_before = copy.deepcopy(self.knowledge)
with session().log("Process: {}".format(row)):
tokens = self.tokenize(row)
fit = parsing.get_fit(self, tokens)
fit = self.layers.process(self, row)
if fit is None:
return None

View File

@ -1,16 +1,18 @@
from .layers import tokenization_layer
from .layers import parsing_layer
from .layers import parsing
from .session.org_mode import global_session as session
def make_yield_pipe(layers, knowledge_base, example):
def make_yield_pipe(layers, knowledge_base, example, func):
if len(layers) < 1:
yield example
return
input_generator = make_yield_pipe(layers[:-1], knowledge_base, example)
input_generator = make_yield_pipe(layers[:-1], knowledge_base, example, func)
for input in input_generator:
print("-->", input)
for d in list(layers[-1].integrate(knowledge_base, input)):
session().annotate("[{}] --> {}".format(len(layers), input))
for d in list(func(layers[-1], input)):
yield d
@ -25,15 +27,10 @@ class BaseModel:
]
def reprocess(self, examples):
for example in examples:
self._reprocess_single(example)
def _reprocess_single(self, example):
return
pattern_examples = []
for i, sample in enumerate(examples):
other = examples[:i] + examples[i + 1:]
match = get_matching(sample, other)
match = parsing.get_matching(sample, other)
if len(match) > 0:
sample = (match, sample[1],)
pattern_examples.append(sample)
@ -41,7 +38,12 @@ class BaseModel:
return pattern_examples
def integrate(self, knowledge_base, example):
yield from make_yield_pipe(self.layers, knowledge_base, example)
yield from make_yield_pipe(self.layers, knowledge_base,
example, lambda l, i: l.integrate(knowledge_base, i))
def process(self, knowledge_base, example):
yield from make_yield_pipe(self.layers, knowledge_base,
example, lambda l, i: l.process(knowledge_base, i))
def tokenize(self, row, return_one=True):
return self.tokenization.to_tokens(row)

View File

@ -9,6 +9,7 @@ from typing import List, Dict
from ..modifiable_property import ModifiableProperty
from .. import parameters
from ..atoms import Atom, a, is_atom
from .. import knowledge_evaluation
def make_template(knowledge_base, tokens, parsed):
matcher = list(tokens)
@ -97,8 +98,9 @@ def integrate_language(knowledge_base, example):
result = build_remix_matrix(knowledge_base, tokens, atom, similar)
if result is not None:
break
else:
raise Exception('Similar not found')
return
remix, (start_bounds, end_bounds) = result
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)

View File

@ -8,4 +8,7 @@ class ParsingLayer:
yield from parsing.integrate_language(knowledge_base, example)
def train(self, knowledge_base, example):
assert False
assert False
def process(self, knowledge_base, input):
yield from parsing.get_fit(knowledge_base, input)

View File

@ -29,14 +29,20 @@ class TokenizationLayer:
def integrate(self, knowledge_base, data):
assert knowledge_base is self.knowledge_base
print(data)
assert 'text' in data
with session().log("Tokenize: {}".format(data['text'])):
for tokens in tokenization.to_tokens(self, data['text']):
data_with_row = copy.copy(data)
data_with_row['tokens'] = tokens
print(data_with_row)
yield data_with_row
tokens = self.tokenize(data['text'])
data_with_row = copy.copy(data)
data_with_row['tokens'] = tokens
yield data_with_row
# with session().log("Tokenize: {}".format(data['text'])):
# for tokens in tokenization.to_tokens(self, data['text']):
# data_with_row = copy.copy(data)
# data_with_row['tokens'] = tokens
# yield data_with_row
def process(self, knowledge_base, row):
yield self.tokenize(row)
def tokenize(self, row, return_one=True):