#!/usr/bin/env python from functools import reduce def make_template(knowledge_base, text, parsed): tokens = text.split() matcher = list(tokens) template = list(parsed) for i in range(len(matcher)): word = matcher[i] if word in template: template[template.index(word)] = i matcher[i] = { 'groups': set(knowledge_base.knowledge[word]['groups']) } return tokens, matcher, template def integrate_language(knowledge_base, example): text = example["text"].lower() parsed = example["parsed"] tokens, matcher, result = make_template(knowledge_base, text, parsed) print(text) print(parsed) print() return tokens, matcher, result def get_matching(sample, other): l = len(sample[0]) other = list(filter(lambda x: len(x[0]) == l, other)) for i in range(l): if len(other) == 0: return [] if not isinstance(sample[0][i], str): other = list(filter(lambda x: not isinstance(x[0][i], str) and len(x[0][i]['groups'] & sample[0][i]['groups']) > 0, other)) return [sample[0][x] if isinstance(sample[0][x], str) else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b, map(lambda y: y[0][x]['groups'], other))} for x in range(l)] def reprocess_language_knowledge(knowledge_base, examples): examples = knowledge_base.examples + examples print('\n'.join(map(str, knowledge_base.examples))) print("--") pattern_examples = [] for i, sample in enumerate(examples): other = examples[:i] + examples[i + 1:] print(sample) match = get_matching(sample, other) print("->", match) if len(match) > 0: sample = (match, sample[1],) pattern_examples.append(sample) print() return pattern_examples def get_fit(knowledge, row): row = row.lower().split() for sample, ast in knowledge.trained: if len(sample) != len(row): continue if all(map(lambda x: (not isinstance(sample[x], str) or sample[x] == row[x]), range(len(sample)))): return row, sample, ast else: return None