#!/usr/bin/env python import json from functools import reduce examples = [ { "text": "icecream is cold", "parsed": ("exists-property-with-value", 'icecream', 'cold'), }, { "text": "lava is dangerous", "parsed": ("exists-property-with-value", 'lava', 'dangerous') }, { "text": "earth is a planet", "parsed": ("pertenence-to-group", 'earth', 'planet'), }, { "text": "Green is a color", "parsed": ("pertenence-to-group", 'green', 'color'), }, { "text": "a plane can fly", "parsed": ("has-capacity", 'plane', 'fly') }, { "text": "a wale can swim", "parsed": ("has-capacity", 'wale', 'swim') } ] knowledge_base = { 'icecream': { "groups": ['noun', 'object', 'comestible', 'sweet'], }, 'lava': { "groups": ['noun', 'object'], }, 'earth': { "groups": ['noun', 'object', 'planet'], }, 'green': { "groups": ['noun', 'color', 'concept'], }, 'plane': { "groups": ['noun', 'object', 'vehicle', 'fast'], }, 'car': { "groups": ['noun', 'object', 'vehicle', 'slow-ish'], }, 'wale': { "groups": ['noun', 'object', 'living-being'] }, 'cold': { "groups": ['property', 'temperature'], }, 'dangerous': { "groups": ['property'], }, 'planet': { "groups": ['noun', 'group'], }, 'color': { "groups": ['property', 'group'], }, 'fly': { "groups": ['verb'], }, 'swim': { "groups": ['verb'], }, } def make_template(text, parsed): tokens = text.split() template = list(parsed) for i in range(len(tokens)): word = tokens[i] if word in template: print(word, i, template) template[template.index(word)] = i tokens[i] = {'groups': set(knowledge_base[word]['groups'])} return tokens, template def ingest(example): text = example["text"].lower() parsed = example["parsed"] matcher, result = make_template(text, parsed) print(text) print(parsed) print() return matcher, result language_training = [] for example in examples: language_training.append(ingest(example)) def get_matching(sample, other): l = len(sample[0]) other = list(filter(lambda x: len(x[0]) == l, other)) for i in range(l): if len(other) == 0: return [] if not isinstance(sample[0][i], str): other = list(filter(lambda x: not isinstance(x[0][i], str) and len(x[0][i]['groups'] & sample[0][i]['groups']) > 0, other)) return [sample[0][x] if isinstance(sample[0][x], str) else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b, map(lambda y: y[0][x]['groups'], other))} for x in range(l)] print('\n'.join(map(str, language_training))) print("--") pattern_examples = [] for i, sample in enumerate(language_training): other = language_training[:i] + language_training[i + 1:] print(sample) match = get_matching(sample, other) print("->", match) if len(match) > 0: sample = (match, sample[1],) pattern_examples.append(sample) print() def get_fit(patterns, row): for sample, ast in patterns: if len(sample) != len(row): continue if all(map(lambda x: not isinstance(sample[x], str) or sample[x] == row[x], range(len(sample)))): return sample, ast else: return None for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: row = test['text'].lower().split() fit = get_fit(pattern_examples, row) print(test['text'], fit) # while True: # row = input('>>> ').lower().split() # fit = get_fit(pattern_examples, row) # if fit is None: # print("No fit") # else: # print(fit)