Pass test using layer structure.
This commit is contained in:
parent
a444766c7c
commit
1ded981099
@ -58,7 +58,8 @@ class KnowledgeBase(object):
|
|||||||
|
|
||||||
# Reduce values
|
# Reduce values
|
||||||
with session().log("reprocessing"):
|
with session().log("reprocessing"):
|
||||||
self.layers.reprocess(self.examples)
|
res = self.layers.reprocess(self.examples)
|
||||||
|
self.trained = res
|
||||||
|
|
||||||
knowledge_after = copy.deepcopy(self.knowledge)
|
knowledge_after = copy.deepcopy(self.knowledge)
|
||||||
knowledge_diff_getter = lambda: diff_knowledge(knowledge_before,
|
knowledge_diff_getter = lambda: diff_knowledge(knowledge_before,
|
||||||
@ -69,9 +70,7 @@ class KnowledgeBase(object):
|
|||||||
def process(self, row):
|
def process(self, row):
|
||||||
knowledge_before = copy.deepcopy(self.knowledge)
|
knowledge_before = copy.deepcopy(self.knowledge)
|
||||||
with session().log("Process: {}".format(row)):
|
with session().log("Process: {}".format(row)):
|
||||||
tokens = self.tokenize(row)
|
fit = self.layers.process(self, row)
|
||||||
|
|
||||||
fit = parsing.get_fit(self, tokens)
|
|
||||||
if fit is None:
|
if fit is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -1,16 +1,18 @@
|
|||||||
from .layers import tokenization_layer
|
from .layers import tokenization_layer
|
||||||
from .layers import parsing_layer
|
from .layers import parsing_layer
|
||||||
|
from .layers import parsing
|
||||||
|
from .session.org_mode import global_session as session
|
||||||
|
|
||||||
|
|
||||||
def make_yield_pipe(layers, knowledge_base, example):
|
def make_yield_pipe(layers, knowledge_base, example, func):
|
||||||
if len(layers) < 1:
|
if len(layers) < 1:
|
||||||
yield example
|
yield example
|
||||||
return
|
return
|
||||||
|
|
||||||
input_generator = make_yield_pipe(layers[:-1], knowledge_base, example)
|
input_generator = make_yield_pipe(layers[:-1], knowledge_base, example, func)
|
||||||
for input in input_generator:
|
for input in input_generator:
|
||||||
print("-->", input)
|
session().annotate("[{}] --> {}".format(len(layers), input))
|
||||||
for d in list(layers[-1].integrate(knowledge_base, input)):
|
for d in list(func(layers[-1], input)):
|
||||||
yield d
|
yield d
|
||||||
|
|
||||||
|
|
||||||
@ -25,15 +27,10 @@ class BaseModel:
|
|||||||
]
|
]
|
||||||
|
|
||||||
def reprocess(self, examples):
|
def reprocess(self, examples):
|
||||||
for example in examples:
|
|
||||||
self._reprocess_single(example)
|
|
||||||
|
|
||||||
def _reprocess_single(self, example):
|
|
||||||
return
|
|
||||||
pattern_examples = []
|
pattern_examples = []
|
||||||
for i, sample in enumerate(examples):
|
for i, sample in enumerate(examples):
|
||||||
other = examples[:i] + examples[i + 1:]
|
other = examples[:i] + examples[i + 1:]
|
||||||
match = get_matching(sample, other)
|
match = parsing.get_matching(sample, other)
|
||||||
if len(match) > 0:
|
if len(match) > 0:
|
||||||
sample = (match, sample[1],)
|
sample = (match, sample[1],)
|
||||||
pattern_examples.append(sample)
|
pattern_examples.append(sample)
|
||||||
@ -41,7 +38,12 @@ class BaseModel:
|
|||||||
return pattern_examples
|
return pattern_examples
|
||||||
|
|
||||||
def integrate(self, knowledge_base, example):
|
def integrate(self, knowledge_base, example):
|
||||||
yield from make_yield_pipe(self.layers, knowledge_base, example)
|
yield from make_yield_pipe(self.layers, knowledge_base,
|
||||||
|
example, lambda l, i: l.integrate(knowledge_base, i))
|
||||||
|
|
||||||
|
def process(self, knowledge_base, example):
|
||||||
|
yield from make_yield_pipe(self.layers, knowledge_base,
|
||||||
|
example, lambda l, i: l.process(knowledge_base, i))
|
||||||
|
|
||||||
def tokenize(self, row, return_one=True):
|
def tokenize(self, row, return_one=True):
|
||||||
return self.tokenization.to_tokens(row)
|
return self.tokenization.to_tokens(row)
|
||||||
|
@ -9,6 +9,7 @@ from typing import List, Dict
|
|||||||
from ..modifiable_property import ModifiableProperty
|
from ..modifiable_property import ModifiableProperty
|
||||||
from .. import parameters
|
from .. import parameters
|
||||||
from ..atoms import Atom, a, is_atom
|
from ..atoms import Atom, a, is_atom
|
||||||
|
from .. import knowledge_evaluation
|
||||||
|
|
||||||
def make_template(knowledge_base, tokens, parsed):
|
def make_template(knowledge_base, tokens, parsed):
|
||||||
matcher = list(tokens)
|
matcher = list(tokens)
|
||||||
@ -97,8 +98,9 @@ def integrate_language(knowledge_base, example):
|
|||||||
result = build_remix_matrix(knowledge_base, tokens, atom, similar)
|
result = build_remix_matrix(knowledge_base, tokens, atom, similar)
|
||||||
if result is not None:
|
if result is not None:
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
|
raise Exception('Similar not found')
|
||||||
|
|
||||||
return
|
|
||||||
remix, (start_bounds, end_bounds) = result
|
remix, (start_bounds, end_bounds) = result
|
||||||
|
|
||||||
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
|
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
|
||||||
|
@ -9,3 +9,6 @@ class ParsingLayer:
|
|||||||
|
|
||||||
def train(self, knowledge_base, example):
|
def train(self, knowledge_base, example):
|
||||||
assert False
|
assert False
|
||||||
|
|
||||||
|
def process(self, knowledge_base, input):
|
||||||
|
yield from parsing.get_fit(knowledge_base, input)
|
@ -29,15 +29,21 @@ class TokenizationLayer:
|
|||||||
def integrate(self, knowledge_base, data):
|
def integrate(self, knowledge_base, data):
|
||||||
assert knowledge_base is self.knowledge_base
|
assert knowledge_base is self.knowledge_base
|
||||||
|
|
||||||
print(data)
|
|
||||||
assert 'text' in data
|
assert 'text' in data
|
||||||
with session().log("Tokenize: {}".format(data['text'])):
|
tokens = self.tokenize(data['text'])
|
||||||
for tokens in tokenization.to_tokens(self, data['text']):
|
|
||||||
data_with_row = copy.copy(data)
|
data_with_row = copy.copy(data)
|
||||||
data_with_row['tokens'] = tokens
|
data_with_row['tokens'] = tokens
|
||||||
print(data_with_row)
|
|
||||||
yield data_with_row
|
yield data_with_row
|
||||||
|
|
||||||
|
# with session().log("Tokenize: {}".format(data['text'])):
|
||||||
|
# for tokens in tokenization.to_tokens(self, data['text']):
|
||||||
|
# data_with_row = copy.copy(data)
|
||||||
|
# data_with_row['tokens'] = tokens
|
||||||
|
# yield data_with_row
|
||||||
|
|
||||||
|
def process(self, knowledge_base, row):
|
||||||
|
yield self.tokenize(row)
|
||||||
|
|
||||||
|
|
||||||
def tokenize(self, row, return_one=True):
|
def tokenize(self, row, return_one=True):
|
||||||
row = row.lower()
|
row = row.lower()
|
||||||
|
Loading…
Reference in New Issue
Block a user