lang-model/naive-nlu/parsing.py

#!/usr/bin/env python

from functools import reduce

from knowledge_base import KnowledgeBase


def make_template(knowledge_base: KnowledgeBase, text, parsed):
    tokens = text.split()
    template = list(parsed)
    for i in range(len(tokens)):
        word = tokens[i]
        if word in template:
            print(word, i, template)
            template[template.index(word)] = i
            print(knowledge_base)
            tokens[i] = {'groups': set(knowledge_base.knowledge[word]['groups'])}
    return tokens, template


def integrate_language(knowledge_base: KnowledgeBase, example):
    text = example["text"].lower()
    parsed = example["parsed"]
    matcher, result = make_template(knowledge_base, text, parsed)
    print(text)
    print(parsed)
    print()
    return matcher, result


def get_matching(sample, other):
    l = len(sample[0])
    other = list(filter(lambda x: len(x[0]) == l, other))
    for i in range(l):
        if len(other) == 0:
            return []

        if not isinstance(sample[0][i], str):
            other = list(filter(lambda x: not isinstance(x[0][i], str) and
                                len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,
                                other))

    return [sample[0][x] if isinstance(sample[0][x], str)
            else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b,
                                                            map(lambda y: y[0][x]['groups'],
                                                                other))}
            for x
            in range(l)]


def reprocess_language_knowledge(knowledge_base, examples):
    examples = knowledge_base.examples + examples

    print('\n'.join(map(str, knowledge_base.examples)))
    print("--")

    pattern_examples = []
    for i, sample in enumerate(examples):
        other = examples[:i] + examples[i + 1:]
        print(sample)
        match = get_matching(sample, other)
        print("->", match)
        if len(match) > 0:
            sample = (match, sample[1],)
        pattern_examples.append(sample)
        print()
    return pattern_examples