Use a single object for both parts of the training.
This commit is contained in:
parent
a1925f5383
commit
5b30713df1
@ -1,63 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
import json
|
|
||||||
import collections
|
|
||||||
|
|
||||||
examples = [
|
|
||||||
{
|
|
||||||
"text": "icecream is cold",
|
|
||||||
"parsed": ("exists-property-with-value", 'icecream', 'cold'),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "earth is a planet",
|
|
||||||
"parsed": ("pertenence-to-group", 'earth', 'planet'),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "Green is a color",
|
|
||||||
"parsed": ("pertenence-to-group", 'green', 'color'),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "airplanes do fly",
|
|
||||||
"parsed": ("has-capacity", 'airplane', 'fly')
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
knowledge_base = collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict()))
|
|
||||||
|
|
||||||
|
|
||||||
def property_for_value(value):
|
|
||||||
if "cold":
|
|
||||||
return "temperature"
|
|
||||||
|
|
||||||
|
|
||||||
def exists_property_with_value(subj, value):
|
|
||||||
knowledge_base[subj][property_for_value(value)] = value
|
|
||||||
|
|
||||||
|
|
||||||
def pertenence_to_group(subj, group):
|
|
||||||
knowledge_base[subj]["group"] = group
|
|
||||||
|
|
||||||
|
|
||||||
def has_capacity(subj, capacity):
|
|
||||||
if "capacities" not in knowledge_base[subj]:
|
|
||||||
knowledge_base[subj]["capacities"] = []
|
|
||||||
knowledge_base[subj]["capacities"].append(capacity)
|
|
||||||
|
|
||||||
|
|
||||||
knowledge_ingestion = {
|
|
||||||
"exists-property-with-value": exists_property_with_value,
|
|
||||||
"pertenence-to-group": pertenence_to_group,
|
|
||||||
"has-capacity": has_capacity,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def ingest(example):
|
|
||||||
method = example['parsed'][0]
|
|
||||||
args = example['parsed'][1:]
|
|
||||||
knowledge_ingestion[method](*args)
|
|
||||||
|
|
||||||
|
|
||||||
for example in examples:
|
|
||||||
ingest(example)
|
|
||||||
|
|
||||||
print(json.dumps(knowledge_base, indent=4, sort_keys=True))
|
|
139
naive-nlu/nlu.py
Normal file
139
naive-nlu/nlu.py
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
import collections
|
||||||
|
from functools import reduce
|
||||||
|
|
||||||
|
|
||||||
|
# # # # # # # # # # Base representation
|
||||||
|
|
||||||
|
KnowledgeBase = collections.namedtuple('KnowledgeBase',
|
||||||
|
[
|
||||||
|
'examples', # Language examples
|
||||||
|
'knowledge', # Knowledge about the world
|
||||||
|
'trained',
|
||||||
|
])
|
||||||
|
|
||||||
|
# # # # # # # # # # Interpretation
|
||||||
|
|
||||||
|
|
||||||
|
def property_for_value(knowledge_base, value):
|
||||||
|
return knowledge_base[value]['as_property']
|
||||||
|
|
||||||
|
|
||||||
|
def exists_property_with_value(knowledge_base, subj, value):
|
||||||
|
|
||||||
|
knowledge_base[subj][property_for_value(knowledge_base, value)] = value
|
||||||
|
|
||||||
|
|
||||||
|
def pertenence_to_group(knowledge_base, subj, group):
|
||||||
|
knowledge_base[subj]["group"] = group
|
||||||
|
|
||||||
|
|
||||||
|
def has_capacity(knowledge_base, subj, capacity):
|
||||||
|
if "capacities" not in knowledge_base[subj]:
|
||||||
|
knowledge_base[subj]["capacities"] = []
|
||||||
|
knowledge_base[subj]["capacities"].append(capacity)
|
||||||
|
|
||||||
|
|
||||||
|
knowledge_ingestion = {
|
||||||
|
"exists-property-with-value": exists_property_with_value,
|
||||||
|
"pertenence-to-group": pertenence_to_group,
|
||||||
|
"has-capacity": has_capacity,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def integrate_information(knowledge_base, example):
|
||||||
|
method = example['parsed'][0]
|
||||||
|
args = example['parsed'][1:]
|
||||||
|
knowledge_ingestion[method](knowledge_base, *args)
|
||||||
|
|
||||||
|
# # # # # # # # # # Parsing
|
||||||
|
|
||||||
|
|
||||||
|
def make_template(knowledge_base: KnowledgeBase, text, parsed):
|
||||||
|
tokens = text.split()
|
||||||
|
template = list(parsed)
|
||||||
|
for i in range(len(tokens)):
|
||||||
|
word = tokens[i]
|
||||||
|
if word in template:
|
||||||
|
print(word, i, template)
|
||||||
|
template[template.index(word)] = i
|
||||||
|
print(knowledge_base)
|
||||||
|
tokens[i] = {'groups': set(knowledge_base.knowledge[word]['groups'])}
|
||||||
|
return tokens, template
|
||||||
|
|
||||||
|
|
||||||
|
def integrate_language(knowledge_base: KnowledgeBase, example):
|
||||||
|
text = example["text"].lower()
|
||||||
|
parsed = example["parsed"]
|
||||||
|
matcher, result = make_template(knowledge_base, text, parsed)
|
||||||
|
print(text)
|
||||||
|
print(parsed)
|
||||||
|
print()
|
||||||
|
return matcher, result
|
||||||
|
|
||||||
|
|
||||||
|
def train(knowledge_base: KnowledgeBase, examples):
|
||||||
|
|
||||||
|
# Parse everything
|
||||||
|
parsed_examples = []
|
||||||
|
for example in examples:
|
||||||
|
parsed_examples.append(integrate_language(knowledge_base, example))
|
||||||
|
|
||||||
|
# Reduce values
|
||||||
|
trained = reprocess_knowledge(knowledge_base, parsed_examples)
|
||||||
|
|
||||||
|
return KnowledgeBase(
|
||||||
|
knowledge=knowledge_base.knowledge,
|
||||||
|
examples=knowledge_base.examples + parsed_examples,
|
||||||
|
trained=trained,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def reprocess_knowledge(knowledge_base, examples):
|
||||||
|
examples = knowledge_base.examples + examples
|
||||||
|
|
||||||
|
print('\n'.join(map(str, knowledge_base.examples)))
|
||||||
|
print("--")
|
||||||
|
|
||||||
|
pattern_examples = []
|
||||||
|
for i, sample in enumerate(examples):
|
||||||
|
other = examples[:i] + examples[i + 1:]
|
||||||
|
print(sample)
|
||||||
|
match = get_matching(sample, other)
|
||||||
|
print("->", match)
|
||||||
|
if len(match) > 0:
|
||||||
|
sample = (match, sample[1],)
|
||||||
|
pattern_examples.append(sample)
|
||||||
|
print()
|
||||||
|
return pattern_examples
|
||||||
|
|
||||||
|
|
||||||
|
def get_matching(sample, other):
|
||||||
|
l = len(sample[0])
|
||||||
|
other = list(filter(lambda x: len(x[0]) == l, other))
|
||||||
|
for i in range(l):
|
||||||
|
if len(other) == 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
if not isinstance(sample[0][i], str):
|
||||||
|
other = list(filter(lambda x: not isinstance(x[0][i], str) and
|
||||||
|
len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,
|
||||||
|
other))
|
||||||
|
|
||||||
|
return [sample[0][x] if isinstance(sample[0][x], str)
|
||||||
|
else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b,
|
||||||
|
map(lambda y: y[0][x]['groups'],
|
||||||
|
other))}
|
||||||
|
for x
|
||||||
|
in range(l)]
|
||||||
|
|
||||||
|
|
||||||
|
def get_fit(knowledge: KnowledgeBase, row):
|
||||||
|
for sample, ast in knowledge.trained:
|
||||||
|
if len(sample) != len(row):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if all(map(lambda x: not isinstance(sample[x], str) or sample[x] == row[x],
|
||||||
|
range(len(sample)))):
|
||||||
|
return sample, ast
|
||||||
|
else:
|
||||||
|
return None
|
88
naive-nlu/test.py
Normal file
88
naive-nlu/test.py
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
import nlu
|
||||||
|
|
||||||
|
examples = [
|
||||||
|
{
|
||||||
|
"text": "icecream is cold",
|
||||||
|
"parsed": ("exists-property-with-value", 'icecream', 'cold'),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "lava is dangerous",
|
||||||
|
"parsed": ("exists-property-with-value", 'lava', 'dangerous')
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "earth is a planet",
|
||||||
|
"parsed": ("pertenence-to-group", 'earth', 'planet'),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "Green is a color",
|
||||||
|
"parsed": ("pertenence-to-group", 'green', 'color'),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "a plane can fly",
|
||||||
|
"parsed": ("has-capacity", 'plane', 'fly')
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "a wale can swim",
|
||||||
|
"parsed": ("has-capacity", 'wale', 'swim')
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
base_knowledge = {
|
||||||
|
'icecream': {
|
||||||
|
"groups": ['noun', 'object', 'comestible', 'sweet'],
|
||||||
|
},
|
||||||
|
'lava': {
|
||||||
|
"groups": ['noun', 'object'],
|
||||||
|
},
|
||||||
|
'earth': {
|
||||||
|
"groups": ['noun', 'object', 'planet'],
|
||||||
|
},
|
||||||
|
'green': {
|
||||||
|
"groups": ['noun', 'color', 'concept'],
|
||||||
|
},
|
||||||
|
'plane': {
|
||||||
|
"groups": ['noun', 'object', 'vehicle', 'fast'],
|
||||||
|
},
|
||||||
|
'car': {
|
||||||
|
"groups": ['noun', 'object', 'vehicle', 'slow-ish'],
|
||||||
|
},
|
||||||
|
'wale': {
|
||||||
|
"groups": ['noun', 'object', 'living-being']
|
||||||
|
},
|
||||||
|
'cold': {
|
||||||
|
"groups": ['property', 'temperature'],
|
||||||
|
},
|
||||||
|
'dangerous': {
|
||||||
|
"groups": ['property'],
|
||||||
|
},
|
||||||
|
'planet': {
|
||||||
|
"groups": ['noun', 'group'],
|
||||||
|
},
|
||||||
|
'color': {
|
||||||
|
"groups": ['property', 'group'],
|
||||||
|
},
|
||||||
|
'fly': {
|
||||||
|
"groups": ['verb'],
|
||||||
|
},
|
||||||
|
'swim': {
|
||||||
|
"groups": ['verb'],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
knowledge = nlu.KnowledgeBase(
|
||||||
|
examples=[],
|
||||||
|
trained=[],
|
||||||
|
knowledge=base_knowledge
|
||||||
|
)
|
||||||
|
|
||||||
|
knowledge = nlu.train(knowledge, examples)
|
||||||
|
for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
|
||||||
|
row = test['text'].lower().split()
|
||||||
|
fit = nlu.get_fit(knowledge, row)
|
||||||
|
print(test['text'], fit)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
158
parsing_test.py
158
parsing_test.py
@ -1,158 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
import json
|
|
||||||
from functools import reduce
|
|
||||||
|
|
||||||
examples = [
|
|
||||||
{
|
|
||||||
"text": "icecream is cold",
|
|
||||||
"parsed": ("exists-property-with-value", 'icecream', 'cold'),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "lava is dangerous",
|
|
||||||
"parsed": ("exists-property-with-value", 'lava', 'dangerous')
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "earth is a planet",
|
|
||||||
"parsed": ("pertenence-to-group", 'earth', 'planet'),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "Green is a color",
|
|
||||||
"parsed": ("pertenence-to-group", 'green', 'color'),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "a plane can fly",
|
|
||||||
"parsed": ("has-capacity", 'plane', 'fly')
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "a wale can swim",
|
|
||||||
"parsed": ("has-capacity", 'wale', 'swim')
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
knowledge_base = {
|
|
||||||
'icecream': {
|
|
||||||
"groups": ['noun', 'object', 'comestible', 'sweet'],
|
|
||||||
},
|
|
||||||
'lava': {
|
|
||||||
"groups": ['noun', 'object'],
|
|
||||||
},
|
|
||||||
'earth': {
|
|
||||||
"groups": ['noun', 'object', 'planet'],
|
|
||||||
},
|
|
||||||
'green': {
|
|
||||||
"groups": ['noun', 'color', 'concept'],
|
|
||||||
},
|
|
||||||
'plane': {
|
|
||||||
"groups": ['noun', 'object', 'vehicle', 'fast'],
|
|
||||||
},
|
|
||||||
'car': {
|
|
||||||
"groups": ['noun', 'object', 'vehicle', 'slow-ish'],
|
|
||||||
},
|
|
||||||
'wale': {
|
|
||||||
"groups": ['noun', 'object', 'living-being']
|
|
||||||
},
|
|
||||||
'cold': {
|
|
||||||
"groups": ['property', 'temperature'],
|
|
||||||
},
|
|
||||||
'dangerous': {
|
|
||||||
"groups": ['property'],
|
|
||||||
},
|
|
||||||
'planet': {
|
|
||||||
"groups": ['noun', 'group'],
|
|
||||||
},
|
|
||||||
'color': {
|
|
||||||
"groups": ['property', 'group'],
|
|
||||||
},
|
|
||||||
'fly': {
|
|
||||||
"groups": ['verb'],
|
|
||||||
},
|
|
||||||
'swim': {
|
|
||||||
"groups": ['verb'],
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def make_template(text, parsed):
|
|
||||||
tokens = text.split()
|
|
||||||
template = list(parsed)
|
|
||||||
for i in range(len(tokens)):
|
|
||||||
word = tokens[i]
|
|
||||||
if word in template:
|
|
||||||
print(word, i, template)
|
|
||||||
template[template.index(word)] = i
|
|
||||||
tokens[i] = {'groups': set(knowledge_base[word]['groups'])}
|
|
||||||
return tokens, template
|
|
||||||
|
|
||||||
|
|
||||||
def ingest(example):
|
|
||||||
text = example["text"].lower()
|
|
||||||
parsed = example["parsed"]
|
|
||||||
matcher, result = make_template(text, parsed)
|
|
||||||
print(text)
|
|
||||||
print(parsed)
|
|
||||||
print()
|
|
||||||
return matcher, result
|
|
||||||
|
|
||||||
|
|
||||||
language_training = []
|
|
||||||
for example in examples:
|
|
||||||
language_training.append(ingest(example))
|
|
||||||
|
|
||||||
|
|
||||||
def get_matching(sample, other):
|
|
||||||
l = len(sample[0])
|
|
||||||
other = list(filter(lambda x: len(x[0]) == l, other))
|
|
||||||
for i in range(l):
|
|
||||||
if len(other) == 0:
|
|
||||||
return []
|
|
||||||
|
|
||||||
if not isinstance(sample[0][i], str):
|
|
||||||
other = list(filter(lambda x: not isinstance(x[0][i], str) and
|
|
||||||
len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,
|
|
||||||
other))
|
|
||||||
|
|
||||||
return [sample[0][x] if isinstance(sample[0][x], str)
|
|
||||||
else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b,
|
|
||||||
map(lambda y: y[0][x]['groups'],
|
|
||||||
other))}
|
|
||||||
for x
|
|
||||||
in range(l)]
|
|
||||||
|
|
||||||
|
|
||||||
print('\n'.join(map(str, language_training)))
|
|
||||||
print("--")
|
|
||||||
pattern_examples = []
|
|
||||||
for i, sample in enumerate(language_training):
|
|
||||||
other = language_training[:i] + language_training[i + 1:]
|
|
||||||
print(sample)
|
|
||||||
match = get_matching(sample, other)
|
|
||||||
print("->", match)
|
|
||||||
if len(match) > 0:
|
|
||||||
sample = (match, sample[1],)
|
|
||||||
pattern_examples.append(sample)
|
|
||||||
print()
|
|
||||||
|
|
||||||
def get_fit(patterns, row):
|
|
||||||
for sample, ast in patterns:
|
|
||||||
if len(sample) != len(row):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if all(map(lambda x: not isinstance(sample[x], str) or sample[x] == row[x],
|
|
||||||
range(len(sample)))):
|
|
||||||
return sample, ast
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
|
|
||||||
row = test['text'].lower().split()
|
|
||||||
fit = get_fit(pattern_examples, row)
|
|
||||||
print(test['text'], fit)
|
|
||||||
|
|
||||||
# while True:
|
|
||||||
# row = input('>>> ').lower().split()
|
|
||||||
# fit = get_fit(pattern_examples, row)
|
|
||||||
# if fit is None:
|
|
||||||
# print("No fit")
|
|
||||||
# else:
|
|
||||||
# print(fit)
|
|
Loading…
Reference in New Issue
Block a user