Pass test using layer structure.
This commit is contained in:
parent
a444766c7c
commit
1ded981099
@ -58,7 +58,8 @@ class KnowledgeBase(object):
|
||||
|
||||
# Reduce values
|
||||
with session().log("reprocessing"):
|
||||
self.layers.reprocess(self.examples)
|
||||
res = self.layers.reprocess(self.examples)
|
||||
self.trained = res
|
||||
|
||||
knowledge_after = copy.deepcopy(self.knowledge)
|
||||
knowledge_diff_getter = lambda: diff_knowledge(knowledge_before,
|
||||
@ -69,9 +70,7 @@ class KnowledgeBase(object):
|
||||
def process(self, row):
|
||||
knowledge_before = copy.deepcopy(self.knowledge)
|
||||
with session().log("Process: {}".format(row)):
|
||||
tokens = self.tokenize(row)
|
||||
|
||||
fit = parsing.get_fit(self, tokens)
|
||||
fit = self.layers.process(self, row)
|
||||
if fit is None:
|
||||
return None
|
||||
|
||||
|
@ -1,16 +1,18 @@
|
||||
from .layers import tokenization_layer
|
||||
from .layers import parsing_layer
|
||||
from .layers import parsing
|
||||
from .session.org_mode import global_session as session
|
||||
|
||||
|
||||
def make_yield_pipe(layers, knowledge_base, example):
|
||||
def make_yield_pipe(layers, knowledge_base, example, func):
|
||||
if len(layers) < 1:
|
||||
yield example
|
||||
return
|
||||
|
||||
input_generator = make_yield_pipe(layers[:-1], knowledge_base, example)
|
||||
input_generator = make_yield_pipe(layers[:-1], knowledge_base, example, func)
|
||||
for input in input_generator:
|
||||
print("-->", input)
|
||||
for d in list(layers[-1].integrate(knowledge_base, input)):
|
||||
session().annotate("[{}] --> {}".format(len(layers), input))
|
||||
for d in list(func(layers[-1], input)):
|
||||
yield d
|
||||
|
||||
|
||||
@ -25,15 +27,10 @@ class BaseModel:
|
||||
]
|
||||
|
||||
def reprocess(self, examples):
|
||||
for example in examples:
|
||||
self._reprocess_single(example)
|
||||
|
||||
def _reprocess_single(self, example):
|
||||
return
|
||||
pattern_examples = []
|
||||
for i, sample in enumerate(examples):
|
||||
other = examples[:i] + examples[i + 1:]
|
||||
match = get_matching(sample, other)
|
||||
match = parsing.get_matching(sample, other)
|
||||
if len(match) > 0:
|
||||
sample = (match, sample[1],)
|
||||
pattern_examples.append(sample)
|
||||
@ -41,7 +38,12 @@ class BaseModel:
|
||||
return pattern_examples
|
||||
|
||||
def integrate(self, knowledge_base, example):
|
||||
yield from make_yield_pipe(self.layers, knowledge_base, example)
|
||||
yield from make_yield_pipe(self.layers, knowledge_base,
|
||||
example, lambda l, i: l.integrate(knowledge_base, i))
|
||||
|
||||
def process(self, knowledge_base, example):
|
||||
yield from make_yield_pipe(self.layers, knowledge_base,
|
||||
example, lambda l, i: l.process(knowledge_base, i))
|
||||
|
||||
def tokenize(self, row, return_one=True):
|
||||
return self.tokenization.to_tokens(row)
|
||||
|
@ -9,6 +9,7 @@ from typing import List, Dict
|
||||
from ..modifiable_property import ModifiableProperty
|
||||
from .. import parameters
|
||||
from ..atoms import Atom, a, is_atom
|
||||
from .. import knowledge_evaluation
|
||||
|
||||
def make_template(knowledge_base, tokens, parsed):
|
||||
matcher = list(tokens)
|
||||
@ -97,8 +98,9 @@ def integrate_language(knowledge_base, example):
|
||||
result = build_remix_matrix(knowledge_base, tokens, atom, similar)
|
||||
if result is not None:
|
||||
break
|
||||
else:
|
||||
raise Exception('Similar not found')
|
||||
|
||||
return
|
||||
remix, (start_bounds, end_bounds) = result
|
||||
|
||||
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
|
||||
|
@ -8,4 +8,7 @@ class ParsingLayer:
|
||||
yield from parsing.integrate_language(knowledge_base, example)
|
||||
|
||||
def train(self, knowledge_base, example):
|
||||
assert False
|
||||
assert False
|
||||
|
||||
def process(self, knowledge_base, input):
|
||||
yield from parsing.get_fit(knowledge_base, input)
|
@ -29,14 +29,20 @@ class TokenizationLayer:
|
||||
def integrate(self, knowledge_base, data):
|
||||
assert knowledge_base is self.knowledge_base
|
||||
|
||||
print(data)
|
||||
assert 'text' in data
|
||||
with session().log("Tokenize: {}".format(data['text'])):
|
||||
for tokens in tokenization.to_tokens(self, data['text']):
|
||||
data_with_row = copy.copy(data)
|
||||
data_with_row['tokens'] = tokens
|
||||
print(data_with_row)
|
||||
yield data_with_row
|
||||
tokens = self.tokenize(data['text'])
|
||||
data_with_row = copy.copy(data)
|
||||
data_with_row['tokens'] = tokens
|
||||
yield data_with_row
|
||||
|
||||
# with session().log("Tokenize: {}".format(data['text'])):
|
||||
# for tokens in tokenization.to_tokens(self, data['text']):
|
||||
# data_with_row = copy.copy(data)
|
||||
# data_with_row['tokens'] = tokens
|
||||
# yield data_with_row
|
||||
|
||||
def process(self, knowledge_base, row):
|
||||
yield self.tokenize(row)
|
||||
|
||||
|
||||
def tokenize(self, row, return_one=True):
|
||||
|
Loading…
Reference in New Issue
Block a user