159 lines
4.0 KiB
Python
159 lines
4.0 KiB
Python
#!/usr/bin/env python
|
|
|
|
import json
|
|
from functools import reduce
|
|
|
|
examples = [
|
|
{
|
|
"text": "icecream is cold",
|
|
"parsed": ("exists-property-with-value", 'icecream', 'cold'),
|
|
},
|
|
{
|
|
"text": "lava is dangerous",
|
|
"parsed": ("exists-property-with-value", 'lava', 'dangerous')
|
|
},
|
|
{
|
|
"text": "earth is a planet",
|
|
"parsed": ("pertenence-to-group", 'earth', 'planet'),
|
|
},
|
|
{
|
|
"text": "Green is a color",
|
|
"parsed": ("pertenence-to-group", 'green', 'color'),
|
|
},
|
|
{
|
|
"text": "a plane can fly",
|
|
"parsed": ("has-capacity", 'plane', 'fly')
|
|
},
|
|
{
|
|
"text": "a wale can swim",
|
|
"parsed": ("has-capacity", 'wale', 'swim')
|
|
}
|
|
]
|
|
|
|
knowledge_base = {
|
|
'icecream': {
|
|
"groups": ['noun', 'object', 'comestible', 'sweet'],
|
|
},
|
|
'lava': {
|
|
"groups": ['noun', 'object'],
|
|
},
|
|
'earth': {
|
|
"groups": ['noun', 'object', 'planet'],
|
|
},
|
|
'green': {
|
|
"groups": ['noun', 'color', 'concept'],
|
|
},
|
|
'plane': {
|
|
"groups": ['noun', 'object', 'vehicle', 'fast'],
|
|
},
|
|
'car': {
|
|
"groups": ['noun', 'object', 'vehicle', 'slow-ish'],
|
|
},
|
|
'wale': {
|
|
"groups": ['noun', 'object', 'living-being']
|
|
},
|
|
'cold': {
|
|
"groups": ['property', 'temperature'],
|
|
},
|
|
'dangerous': {
|
|
"groups": ['property'],
|
|
},
|
|
'planet': {
|
|
"groups": ['noun', 'group'],
|
|
},
|
|
'color': {
|
|
"groups": ['property', 'group'],
|
|
},
|
|
'fly': {
|
|
"groups": ['verb'],
|
|
},
|
|
'swim': {
|
|
"groups": ['verb'],
|
|
},
|
|
}
|
|
|
|
|
|
def make_template(text, parsed):
|
|
tokens = text.split()
|
|
template = list(parsed)
|
|
for i in range(len(tokens)):
|
|
word = tokens[i]
|
|
if word in template:
|
|
print(word, i, template)
|
|
template[template.index(word)] = i
|
|
tokens[i] = {'groups': set(knowledge_base[word]['groups'])}
|
|
return tokens, template
|
|
|
|
|
|
def ingest(example):
|
|
text = example["text"].lower()
|
|
parsed = example["parsed"]
|
|
matcher, result = make_template(text, parsed)
|
|
print(text)
|
|
print(parsed)
|
|
print()
|
|
return matcher, result
|
|
|
|
|
|
language_training = []
|
|
for example in examples:
|
|
language_training.append(ingest(example))
|
|
|
|
|
|
def get_matching(sample, other):
|
|
l = len(sample[0])
|
|
other = list(filter(lambda x: len(x[0]) == l, other))
|
|
for i in range(l):
|
|
if len(other) == 0:
|
|
return []
|
|
|
|
if not isinstance(sample[0][i], str):
|
|
other = list(filter(lambda x: not isinstance(x[0][i], str) and
|
|
len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,
|
|
other))
|
|
|
|
return [sample[0][x] if isinstance(sample[0][x], str)
|
|
else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b,
|
|
map(lambda y: y[0][x]['groups'],
|
|
other))}
|
|
for x
|
|
in range(l)]
|
|
|
|
|
|
print('\n'.join(map(str, language_training)))
|
|
print("--")
|
|
pattern_examples = []
|
|
for i, sample in enumerate(language_training):
|
|
other = language_training[:i] + language_training[i + 1:]
|
|
print(sample)
|
|
match = get_matching(sample, other)
|
|
print("->", match)
|
|
if len(match) > 0:
|
|
sample = (match, sample[1],)
|
|
pattern_examples.append(sample)
|
|
print()
|
|
|
|
def get_fit(patterns, row):
|
|
for sample, ast in patterns:
|
|
if len(sample) != len(row):
|
|
continue
|
|
|
|
if all(map(lambda x: not isinstance(sample[x], str) or sample[x] == row[x],
|
|
range(len(sample)))):
|
|
return sample, ast
|
|
else:
|
|
return None
|
|
|
|
for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
|
|
row = test['text'].lower().split()
|
|
fit = get_fit(pattern_examples, row)
|
|
print(test['text'], fit)
|
|
|
|
# while True:
|
|
# row = input('>>> ').lower().split()
|
|
# fit = get_fit(pattern_examples, row)
|
|
# if fit is None:
|
|
# print("No fit")
|
|
# else:
|
|
# print(fit)
|