Separation of functionalities in modules.
This commit is contained in:
parent
5b30713df1
commit
edc3cb97ab
8
naive-nlu/knowledge_base.py
Normal file
8
naive-nlu/knowledge_base.py
Normal file
@ -0,0 +1,8 @@
|
||||
import collections
|
||||
|
||||
KnowledgeBase = collections.namedtuple('KnowledgeBase',
|
||||
[
|
||||
'examples', # Language examples
|
||||
'knowledge', # Knowledge about the world
|
||||
'trained',
|
||||
])
|
33
naive-nlu/knowledge_evaluation.py
Normal file
33
naive-nlu/knowledge_evaluation.py
Normal file
@ -0,0 +1,33 @@
|
||||
from knowledge_base import KnowledgeBase
|
||||
|
||||
|
||||
def property_for_value(knowledge_base: KnowledgeBase, value):
|
||||
return knowledge_base[value]['as_property']
|
||||
|
||||
|
||||
def exists_property_with_value(knowledge_base: KnowledgeBase, subj, value):
|
||||
|
||||
knowledge_base[subj][property_for_value(knowledge_base, value)] = value
|
||||
|
||||
|
||||
def pertenence_to_group(knowledge_base: KnowledgeBase, subj, group):
|
||||
knowledge_base[subj]["group"] = group
|
||||
|
||||
|
||||
def has_capacity(knowledge_base: KnowledgeBase, subj, capacity):
|
||||
if "capacities" not in knowledge_base[subj]:
|
||||
knowledge_base[subj]["capacities"] = []
|
||||
knowledge_base[subj]["capacities"].append(capacity)
|
||||
|
||||
|
||||
knowledge_ingestion = {
|
||||
"exists-property-with-value": exists_property_with_value,
|
||||
"pertenence-to-group": pertenence_to_group,
|
||||
"has-capacity": has_capacity,
|
||||
}
|
||||
|
||||
|
||||
def integrate_information(knowledge_base: KnowledgeBase, example):
|
||||
method = example['parsed'][0]
|
||||
args = example['parsed'][1:]
|
||||
knowledge_ingestion[method](knowledge_base, *args)
|
148
naive-nlu/nlu.py
148
naive-nlu/nlu.py
@ -1,130 +1,6 @@
|
||||
import collections
|
||||
from functools import reduce
|
||||
|
||||
|
||||
# # # # # # # # # # Base representation
|
||||
|
||||
KnowledgeBase = collections.namedtuple('KnowledgeBase',
|
||||
[
|
||||
'examples', # Language examples
|
||||
'knowledge', # Knowledge about the world
|
||||
'trained',
|
||||
])
|
||||
|
||||
# # # # # # # # # # Interpretation
|
||||
|
||||
|
||||
def property_for_value(knowledge_base, value):
|
||||
return knowledge_base[value]['as_property']
|
||||
|
||||
|
||||
def exists_property_with_value(knowledge_base, subj, value):
|
||||
|
||||
knowledge_base[subj][property_for_value(knowledge_base, value)] = value
|
||||
|
||||
|
||||
def pertenence_to_group(knowledge_base, subj, group):
|
||||
knowledge_base[subj]["group"] = group
|
||||
|
||||
|
||||
def has_capacity(knowledge_base, subj, capacity):
|
||||
if "capacities" not in knowledge_base[subj]:
|
||||
knowledge_base[subj]["capacities"] = []
|
||||
knowledge_base[subj]["capacities"].append(capacity)
|
||||
|
||||
|
||||
knowledge_ingestion = {
|
||||
"exists-property-with-value": exists_property_with_value,
|
||||
"pertenence-to-group": pertenence_to_group,
|
||||
"has-capacity": has_capacity,
|
||||
}
|
||||
|
||||
|
||||
def integrate_information(knowledge_base, example):
|
||||
method = example['parsed'][0]
|
||||
args = example['parsed'][1:]
|
||||
knowledge_ingestion[method](knowledge_base, *args)
|
||||
|
||||
# # # # # # # # # # Parsing
|
||||
|
||||
|
||||
def make_template(knowledge_base: KnowledgeBase, text, parsed):
|
||||
tokens = text.split()
|
||||
template = list(parsed)
|
||||
for i in range(len(tokens)):
|
||||
word = tokens[i]
|
||||
if word in template:
|
||||
print(word, i, template)
|
||||
template[template.index(word)] = i
|
||||
print(knowledge_base)
|
||||
tokens[i] = {'groups': set(knowledge_base.knowledge[word]['groups'])}
|
||||
return tokens, template
|
||||
|
||||
|
||||
def integrate_language(knowledge_base: KnowledgeBase, example):
|
||||
text = example["text"].lower()
|
||||
parsed = example["parsed"]
|
||||
matcher, result = make_template(knowledge_base, text, parsed)
|
||||
print(text)
|
||||
print(parsed)
|
||||
print()
|
||||
return matcher, result
|
||||
|
||||
|
||||
def train(knowledge_base: KnowledgeBase, examples):
|
||||
|
||||
# Parse everything
|
||||
parsed_examples = []
|
||||
for example in examples:
|
||||
parsed_examples.append(integrate_language(knowledge_base, example))
|
||||
|
||||
# Reduce values
|
||||
trained = reprocess_knowledge(knowledge_base, parsed_examples)
|
||||
|
||||
return KnowledgeBase(
|
||||
knowledge=knowledge_base.knowledge,
|
||||
examples=knowledge_base.examples + parsed_examples,
|
||||
trained=trained,
|
||||
)
|
||||
|
||||
|
||||
def reprocess_knowledge(knowledge_base, examples):
|
||||
examples = knowledge_base.examples + examples
|
||||
|
||||
print('\n'.join(map(str, knowledge_base.examples)))
|
||||
print("--")
|
||||
|
||||
pattern_examples = []
|
||||
for i, sample in enumerate(examples):
|
||||
other = examples[:i] + examples[i + 1:]
|
||||
print(sample)
|
||||
match = get_matching(sample, other)
|
||||
print("->", match)
|
||||
if len(match) > 0:
|
||||
sample = (match, sample[1],)
|
||||
pattern_examples.append(sample)
|
||||
print()
|
||||
return pattern_examples
|
||||
|
||||
|
||||
def get_matching(sample, other):
|
||||
l = len(sample[0])
|
||||
other = list(filter(lambda x: len(x[0]) == l, other))
|
||||
for i in range(l):
|
||||
if len(other) == 0:
|
||||
return []
|
||||
|
||||
if not isinstance(sample[0][i], str):
|
||||
other = list(filter(lambda x: not isinstance(x[0][i], str) and
|
||||
len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,
|
||||
other))
|
||||
|
||||
return [sample[0][x] if isinstance(sample[0][x], str)
|
||||
else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b,
|
||||
map(lambda y: y[0][x]['groups'],
|
||||
other))}
|
||||
for x
|
||||
in range(l)]
|
||||
from knowledge_base import KnowledgeBase
|
||||
import knowledge_evaluation
|
||||
import parsing
|
||||
|
||||
|
||||
def get_fit(knowledge: KnowledgeBase, row):
|
||||
@ -137,3 +13,21 @@ def get_fit(knowledge: KnowledgeBase, row):
|
||||
return sample, ast
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def train(knowledge_base: KnowledgeBase, examples):
|
||||
|
||||
# Parse everything
|
||||
parsed_examples = []
|
||||
for example in examples:
|
||||
parsed_examples.append(parsing.integrate_language(knowledge_base, example))
|
||||
|
||||
# Reduce values
|
||||
trained = parsing.reprocess_language_knowledge(knowledge_base, parsed_examples)
|
||||
|
||||
return KnowledgeBase(
|
||||
knowledge=knowledge_base.knowledge,
|
||||
examples=knowledge_base.examples + parsed_examples,
|
||||
trained=trained,
|
||||
)
|
||||
|
||||
|
69
naive-nlu/parsing.py
Normal file
69
naive-nlu/parsing.py
Normal file
@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from functools import reduce
|
||||
|
||||
from knowledge_base import KnowledgeBase
|
||||
|
||||
|
||||
def make_template(knowledge_base: KnowledgeBase, text, parsed):
|
||||
tokens = text.split()
|
||||
template = list(parsed)
|
||||
for i in range(len(tokens)):
|
||||
word = tokens[i]
|
||||
if word in template:
|
||||
print(word, i, template)
|
||||
template[template.index(word)] = i
|
||||
print(knowledge_base)
|
||||
tokens[i] = {'groups': set(knowledge_base.knowledge[word]['groups'])}
|
||||
return tokens, template
|
||||
|
||||
|
||||
def integrate_language(knowledge_base: KnowledgeBase, example):
|
||||
text = example["text"].lower()
|
||||
parsed = example["parsed"]
|
||||
matcher, result = make_template(knowledge_base, text, parsed)
|
||||
print(text)
|
||||
print(parsed)
|
||||
print()
|
||||
return matcher, result
|
||||
|
||||
|
||||
|
||||
def get_matching(sample, other):
|
||||
l = len(sample[0])
|
||||
other = list(filter(lambda x: len(x[0]) == l, other))
|
||||
for i in range(l):
|
||||
if len(other) == 0:
|
||||
return []
|
||||
|
||||
if not isinstance(sample[0][i], str):
|
||||
other = list(filter(lambda x: not isinstance(x[0][i], str) and
|
||||
len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,
|
||||
other))
|
||||
|
||||
return [sample[0][x] if isinstance(sample[0][x], str)
|
||||
else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b,
|
||||
map(lambda y: y[0][x]['groups'],
|
||||
other))}
|
||||
for x
|
||||
in range(l)]
|
||||
|
||||
|
||||
def reprocess_language_knowledge(knowledge_base, examples):
|
||||
examples = knowledge_base.examples + examples
|
||||
|
||||
print('\n'.join(map(str, knowledge_base.examples)))
|
||||
print("--")
|
||||
|
||||
pattern_examples = []
|
||||
for i, sample in enumerate(examples):
|
||||
other = examples[:i] + examples[i + 1:]
|
||||
print(sample)
|
||||
match = get_matching(sample, other)
|
||||
print("->", match)
|
||||
if len(match) > 0:
|
||||
sample = (match, sample[1],)
|
||||
pattern_examples.append(sample)
|
||||
print()
|
||||
return pattern_examples
|
||||
|
Loading…
Reference in New Issue
Block a user