48 lines
1.4 KiB
Python
48 lines
1.4 KiB
Python
from .layers import tokenization_layer
|
|
from .layers import parsing_layer
|
|
|
|
|
|
def make_yield_pipe(layers, knowledge_base, example):
|
|
if len(layers) < 1:
|
|
yield example
|
|
return
|
|
|
|
input_generator = make_yield_pipe(layers[:-1], knowledge_base, example)
|
|
for input in input_generator:
|
|
print("-->", input)
|
|
for d in list(layers[-1].integrate(knowledge_base, input)):
|
|
yield d
|
|
|
|
|
|
class BaseModel:
|
|
def __init__(self, knowledge_base):
|
|
self.tokenization = tokenization_layer.TokenizationLayer(knowledge_base)
|
|
self.parsing = parsing_layer.ParsingLayer()
|
|
|
|
self.layers = [
|
|
self.tokenization,
|
|
self.parsing,
|
|
]
|
|
|
|
def reprocess(self, examples):
|
|
for example in examples:
|
|
self._reprocess_single(example)
|
|
|
|
def _reprocess_single(self, example):
|
|
return
|
|
pattern_examples = []
|
|
for i, sample in enumerate(examples):
|
|
other = examples[:i] + examples[i + 1:]
|
|
match = get_matching(sample, other)
|
|
if len(match) > 0:
|
|
sample = (match, sample[1],)
|
|
pattern_examples.append(sample)
|
|
|
|
return pattern_examples
|
|
|
|
def integrate(self, knowledge_base, example):
|
|
yield from make_yield_pipe(self.layers, knowledge_base, example)
|
|
|
|
def tokenize(self, row, return_one=True):
|
|
return self.tokenization.to_tokens(row)
|