lang-model/parsing_test.py

#!/usr/bin/env python

import json
from functools import reduce

examples = [
    {
        "text": "icecream is cold",
        "parsed": ("exists-property-with-value", 'icecream', 'cold'),
    },
    {
        "text": "lava is dangerous",
        "parsed": ("exists-property-with-value", 'lava', 'dangerous')
    },
    {
        "text": "earth is a planet",
        "parsed": ("pertenence-to-group", 'earth', 'planet'),
    },
    {
        "text": "Green is a color",
        "parsed": ("pertenence-to-group", 'green', 'color'),
    },
    {
        "text": "a plane can fly",
        "parsed": ("has-capacity", 'plane', 'fly')
    },
    {
        "text": "a wale can swim",
        "parsed": ("has-capacity", 'wale', 'swim')
    }
]

knowledge_base = {
    'icecream': {
        "groups": ['noun', 'object', 'comestible', 'sweet'],
    },
    'lava': {
        "groups": ['noun', 'object'],
    },
    'earth': {
        "groups": ['noun', 'object', 'planet'],
    },
    'green': {
        "groups": ['noun', 'color', 'concept'],
    },
    'plane': {
        "groups": ['noun', 'object', 'vehicle', 'fast'],
    },
    'car': {
        "groups": ['noun', 'object', 'vehicle', 'slow-ish'],
    },
    'wale': {
        "groups": ['noun', 'object', 'living-being']
    },
    'cold': {
        "groups": ['property', 'temperature'],
    },
    'dangerous': {
        "groups": ['property'],
    },
    'planet': {
        "groups": ['noun', 'group'],
    },
    'color': {
        "groups": ['property', 'group'],
    },
    'fly': {
        "groups": ['verb'],
    },
    'swim': {
        "groups": ['verb'],
    },
}


def make_template(text, parsed):
    tokens = text.split()
    template = list(parsed)
    for i in range(len(tokens)):
        word = tokens[i]
        if word in template:
            print(word, i, template)
            template[template.index(word)] = i
            tokens[i] = {'groups': set(knowledge_base[word]['groups'])}
    return tokens, template


def ingest(example):
    text = example["text"].lower()
    parsed = example["parsed"]
    matcher, result = make_template(text, parsed)
    print(text)
    print(parsed)
    print()
    return matcher, result


language_training = []
for example in examples:
    language_training.append(ingest(example))


def get_matching(sample, other):
    l = len(sample[0])
    other = list(filter(lambda x: len(x[0]) == l, other))
    for i in range(l):
        if len(other) == 0:
            return []

        if not isinstance(sample[0][i], str):
            other = list(filter(lambda x: not isinstance(x[0][i], str) and
                                len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,
                                other))

    return [sample[0][x] if isinstance(sample[0][x], str)
            else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b,
                                                            map(lambda y: y[0][x]['groups'],
                                                                other))}
            for x
            in range(l)]


print('\n'.join(map(str, language_training)))
print("--")
pattern_examples = []
for i, sample in enumerate(language_training):
    other = language_training[:i] + language_training[i + 1:]
    print(sample)
    match = get_matching(sample, other)
    print("->", match)
    if len(match) > 0:
        sample = (match, sample[1],)
    pattern_examples.append(sample)
    print()

def get_fit(patterns, row):
    for sample, ast in patterns:
        if len(sample) != len(row):
            continue

        if all(map(lambda x: not isinstance(sample[x], str) or sample[x] == row[x],
                   range(len(sample)))):
               return sample, ast
    else:
        return None

for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
    row = test['text'].lower().split()
    fit = get_fit(pattern_examples, row)
    print(test['text'], fit)

# while True:
#     row = input('>>> ').lower().split()
#     fit = get_fit(pattern_examples, row)
#     if fit is None:
#         print("No fit")
#     else:
#         print(fit)