lang-model/naive-nlu/parsing.py

#!/usr/bin/env python

from functools import reduce


def make_template(knowledge_base, text, parsed):
    tokens = text.split()
    matcher = list(tokens)
    template = list(parsed)
    for i in range(len(matcher)):
        word = matcher[i]
        if word in template:
            template[template.index(word)] = i
            matcher[i] = {
                'groups': set(knowledge_base.knowledge[word]['groups'])
            }
    return tokens, matcher, template


def integrate_language(knowledge_base, example):
    text = example["text"].lower()
    parsed = example["parsed"]
    tokens, matcher, result = make_template(knowledge_base, text, parsed)
    print(text)
    print(parsed)
    print()
    return tokens, matcher, result


def get_matching(sample, other):
    l = len(sample[0])
    other = list(filter(lambda x: len(x[0]) == l, other))
    for i in range(l):
        if len(other) == 0:
            return []

        if not isinstance(sample[0][i], str):
            other = list(filter(lambda x: not isinstance(x[0][i], str) and
                                len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,
                                other))

    return [sample[0][x] if isinstance(sample[0][x], str)
            else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b,
                                                            map(lambda y: y[0][x]['groups'],
                                                                other))}
            for x
            in range(l)]


def reprocess_language_knowledge(knowledge_base, examples):
    examples = knowledge_base.examples + examples

    print('\n'.join(map(str, knowledge_base.examples)))
    print("--")

    pattern_examples = []
    for i, sample in enumerate(examples):
        other = examples[:i] + examples[i + 1:]
        print(sample)
        match = get_matching(sample, other)
        print("->", match)
        if len(match) > 0:
            sample = (match, sample[1],)
        pattern_examples.append(sample)
        print()
    return pattern_examples


def get_fit(knowledge, row):
    for sample, ast in knowledge.trained:
        if len(sample) != len(row):
            continue

        if all(map(lambda x: (not isinstance(sample[x], str)
                              or sample[x] == row[x]),
                   range(len(sample)))):
            return sample, ast
    else:
        return None
Separation of functionalities in modules. 2017-05-11 01:09:39 +02:00			`#!/usr/bin/env python`

			`from functools import reduce`


Integrate elements. * Move interface to KnowledgeBase object. * Connect process and evaluate calls. 2017-05-11 19:54:02 +02:00			`def make_template(knowledge_base, text, parsed):`
Separation of functionalities in modules. 2017-05-11 01:09:39 +02:00			`tokens = text.split()`
Integrate elements. * Move interface to KnowledgeBase object. * Connect process and evaluate calls. 2017-05-11 19:54:02 +02:00			`matcher = list(tokens)`
Separation of functionalities in modules. 2017-05-11 01:09:39 +02:00			`template = list(parsed)`
Integrate elements. * Move interface to KnowledgeBase object. * Connect process and evaluate calls. 2017-05-11 19:54:02 +02:00			`for i in range(len(matcher)):`
			`word = matcher[i]`
Separation of functionalities in modules. 2017-05-11 01:09:39 +02:00			`if word in template:`
			`template[template.index(word)] = i`
Integrate elements. * Move interface to KnowledgeBase object. * Connect process and evaluate calls. 2017-05-11 19:54:02 +02:00			`matcher[i] = {`
			`'groups': set(knowledge_base.knowledge[word]['groups'])`
			`}`
			`return tokens, matcher, template`
Separation of functionalities in modules. 2017-05-11 01:09:39 +02:00

Integrate elements. * Move interface to KnowledgeBase object. * Connect process and evaluate calls. 2017-05-11 19:54:02 +02:00			`def integrate_language(knowledge_base, example):`
Separation of functionalities in modules. 2017-05-11 01:09:39 +02:00			`text = example["text"].lower()`
			`parsed = example["parsed"]`
Integrate elements. * Move interface to KnowledgeBase object. * Connect process and evaluate calls. 2017-05-11 19:54:02 +02:00			`tokens, matcher, result = make_template(knowledge_base, text, parsed)`
Separation of functionalities in modules. 2017-05-11 01:09:39 +02:00			`print(text)`
			`print(parsed)`
			`print()`
Integrate elements. * Move interface to KnowledgeBase object. * Connect process and evaluate calls. 2017-05-11 19:54:02 +02:00			`return tokens, matcher, result`
Separation of functionalities in modules. 2017-05-11 01:09:39 +02:00

			`def get_matching(sample, other):`
			`l = len(sample[0])`
			`other = list(filter(lambda x: len(x[0]) == l, other))`
			`for i in range(l):`
			`if len(other) == 0:`
			`return []`

			`if not isinstance(sample[0][i], str):`
			`other = list(filter(lambda x: not isinstance(x[0][i], str) and`
			`len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,`
			`other))`

			`return [sample[0][x] if isinstance(sample[0][x], str)`
			`else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b,`
			`map(lambda y: y[0][x]['groups'],`
			`other))}`
			`for x`
			`in range(l)]`


			`def reprocess_language_knowledge(knowledge_base, examples):`
			`examples = knowledge_base.examples + examples`

			`print('\n'.join(map(str, knowledge_base.examples)))`
			`print("--")`

			`pattern_examples = []`
			`for i, sample in enumerate(examples):`
			`other = examples[:i] + examples[i + 1:]`
			`print(sample)`
			`match = get_matching(sample, other)`
			`print("->", match)`
			`if len(match) > 0:`
			`sample = (match, sample[1],)`
			`pattern_examples.append(sample)`
			`print()`
			`return pattern_examples`

Integrate elements. * Move interface to KnowledgeBase object. * Connect process and evaluate calls. 2017-05-11 19:54:02 +02:00
			`def get_fit(knowledge, row):`
			`for sample, ast in knowledge.trained:`
			`if len(sample) != len(row):`
			`continue`

			`if all(map(lambda x: (not isinstance(sample[x], str)`
			`or sample[x] == row[x]),`
			`range(len(sample)))):`
			`return sample, ast`
			`else:`
			`return None`