2018-04-23 20:48:10 +00:00
|
|
|
from .layers import tokenization_layer
|
|
|
|
from .layers import parsing_layer
|
2018-04-24 21:01:36 +00:00
|
|
|
from .layers import parsing
|
|
|
|
from .session.org_mode import global_session as session
|
2018-04-23 20:48:10 +00:00
|
|
|
|
|
|
|
|
2018-04-24 21:01:36 +00:00
|
|
|
def make_yield_pipe(layers, knowledge_base, example, func):
|
2018-04-23 20:48:10 +00:00
|
|
|
if len(layers) < 1:
|
|
|
|
yield example
|
|
|
|
return
|
|
|
|
|
2018-04-24 21:01:36 +00:00
|
|
|
input_generator = make_yield_pipe(layers[:-1], knowledge_base, example, func)
|
2018-04-23 20:48:10 +00:00
|
|
|
for input in input_generator:
|
2018-04-24 21:01:36 +00:00
|
|
|
session().annotate("[{}] --> {}".format(len(layers), input))
|
|
|
|
for d in list(func(layers[-1], input)):
|
2018-04-23 20:48:10 +00:00
|
|
|
yield d
|
|
|
|
|
|
|
|
|
|
|
|
class BaseModel:
|
|
|
|
def __init__(self, knowledge_base):
|
|
|
|
self.tokenization = tokenization_layer.TokenizationLayer(knowledge_base)
|
|
|
|
self.parsing = parsing_layer.ParsingLayer()
|
|
|
|
|
|
|
|
self.layers = [
|
|
|
|
self.tokenization,
|
|
|
|
self.parsing,
|
|
|
|
]
|
|
|
|
|
|
|
|
def reprocess(self, examples):
|
|
|
|
pattern_examples = []
|
|
|
|
for i, sample in enumerate(examples):
|
|
|
|
other = examples[:i] + examples[i + 1:]
|
2018-04-24 21:01:36 +00:00
|
|
|
match = parsing.get_matching(sample, other)
|
2018-04-23 20:48:10 +00:00
|
|
|
if len(match) > 0:
|
|
|
|
sample = (match, sample[1],)
|
|
|
|
pattern_examples.append(sample)
|
|
|
|
|
|
|
|
return pattern_examples
|
|
|
|
|
|
|
|
def integrate(self, knowledge_base, example):
|
2018-04-24 21:01:36 +00:00
|
|
|
yield from make_yield_pipe(self.layers, knowledge_base,
|
|
|
|
example, lambda l, i: l.integrate(knowledge_base, i))
|
|
|
|
|
|
|
|
def process(self, knowledge_base, example):
|
|
|
|
yield from make_yield_pipe(self.layers, knowledge_base,
|
|
|
|
example, lambda l, i: l.process(knowledge_base, i))
|
2018-04-23 20:48:10 +00:00
|
|
|
|
|
|
|
def tokenize(self, row, return_one=True):
|
|
|
|
return self.tokenization.to_tokens(row)
|