from .layers import tokenization_layer from .layers import parsing_layer from .layers import parsing from .session.org_mode import global_session as session def make_yield_pipe(layers, knowledge_base, example, func): if len(layers) < 1: yield example return input_generator = make_yield_pipe(layers[:-1], knowledge_base, example, func) for input in input_generator: session().annotate("[{}] --> {}".format(len(layers), input)) for d in list(func(layers[-1], input)): yield d class BaseModel: def __init__(self, knowledge_base): self.tokenization = tokenization_layer.TokenizationLayer(knowledge_base) self.parsing = parsing_layer.ParsingLayer() self.layers = [ self.tokenization, self.parsing, ] def reprocess(self, examples): pattern_examples = [] for i, sample in enumerate(examples): other = examples[:i] + examples[i + 1:] match = parsing.get_matching(sample, other) if len(match) > 0: sample = (match, sample[1],) pattern_examples.append(sample) return pattern_examples def integrate(self, knowledge_base, example): yield from make_yield_pipe(self.layers, knowledge_base, example, lambda l, i: l.integrate(knowledge_base, i)) def process(self, knowledge_base, example): yield from make_yield_pipe(self.layers, knowledge_base, example, lambda l, i: l.process(knowledge_base, i)) def tokenize(self, row, return_one=True): return self.tokenization.to_tokens(row)