#!/usr/bin/env python from functools import reduce from knowledge_base import KnowledgeBase def make_template(knowledge_base: KnowledgeBase, text, parsed): tokens = text.split() template = list(parsed) for i in range(len(tokens)): word = tokens[i] if word in template: print(word, i, template) template[template.index(word)] = i print(knowledge_base) tokens[i] = {'groups': set(knowledge_base.knowledge[word]['groups'])} return tokens, template def integrate_language(knowledge_base: KnowledgeBase, example): text = example["text"].lower() parsed = example["parsed"] matcher, result = make_template(knowledge_base, text, parsed) print(text) print(parsed) print() return matcher, result def get_matching(sample, other): l = len(sample[0]) other = list(filter(lambda x: len(x[0]) == l, other)) for i in range(l): if len(other) == 0: return [] if not isinstance(sample[0][i], str): other = list(filter(lambda x: not isinstance(x[0][i], str) and len(x[0][i]['groups'] & sample[0][i]['groups']) > 0, other)) return [sample[0][x] if isinstance(sample[0][x], str) else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b, map(lambda y: y[0][x]['groups'], other))} for x in range(l)] def reprocess_language_knowledge(knowledge_base, examples): examples = knowledge_base.examples + examples print('\n'.join(map(str, knowledge_base.examples))) print("--") pattern_examples = [] for i, sample in enumerate(examples): other = examples[:i] + examples[i + 1:] print(sample) match = get_matching(sample, other) print("->", match) if len(match) > 0: sample = (match, sample[1],) pattern_examples.append(sample) print() return pattern_examples