Integrate elements.
* Move interface to KnowledgeBase object. * Connect process and evaluate calls.
This commit is contained in:
parent
edc3cb97ab
commit
e42ef8f415
@ -1,8 +1,59 @@
|
|||||||
import collections
|
import copy
|
||||||
|
|
||||||
KnowledgeBase = collections.namedtuple('KnowledgeBase',
|
import parsing
|
||||||
[
|
import knowledge_evaluation
|
||||||
'examples', # Language examples
|
|
||||||
'knowledge', # Knowledge about the world
|
|
||||||
'trained',
|
def diff_knowledge(before, after):
|
||||||
])
|
import jsondiff
|
||||||
|
return jsondiff.diff(before, after)
|
||||||
|
|
||||||
|
|
||||||
|
class KnowledgeBase(object):
|
||||||
|
def __init__(self, knowledge, examples=[], trained=[]):
|
||||||
|
self.knowledge = copy.copy(knowledge)
|
||||||
|
self.examples = copy.copy(examples)
|
||||||
|
self.trained = copy.copy(trained)
|
||||||
|
|
||||||
|
def train(self, examples):
|
||||||
|
knowledge_before = copy.deepcopy(self.knowledge)
|
||||||
|
|
||||||
|
# Parse everything
|
||||||
|
parsed_examples = []
|
||||||
|
for example in examples:
|
||||||
|
tokens, decomposition, inferred_tree = parsing.integrate_language(self, example)
|
||||||
|
print(tokens)
|
||||||
|
knowledge_evaluation.integrate_information(self.knowledge, {
|
||||||
|
"elements": tokens,
|
||||||
|
"decomposition": decomposition,
|
||||||
|
"parsed": inferred_tree,
|
||||||
|
})
|
||||||
|
parsed_examples.append((decomposition, inferred_tree))
|
||||||
|
|
||||||
|
# Reduce values
|
||||||
|
trained = parsing.reprocess_language_knowledge(self, parsed_examples)
|
||||||
|
|
||||||
|
self.examples += parsed_examples
|
||||||
|
self.trained = trained
|
||||||
|
|
||||||
|
knowledge_after = copy.deepcopy(self.knowledge)
|
||||||
|
knowledge_diff_getter = lambda: diff_knowledge(knowledge_before,
|
||||||
|
knowledge_after)
|
||||||
|
|
||||||
|
return knowledge_diff_getter
|
||||||
|
|
||||||
|
|
||||||
|
def process(self, row):
|
||||||
|
knowledge_before = copy.deepcopy(self.knowledge)
|
||||||
|
decomposition, inferred_tree = parsing.get_fit(self, row)
|
||||||
|
result = knowledge_evaluation.integrate_information(self.knowledge,
|
||||||
|
{
|
||||||
|
"elements": row,
|
||||||
|
"decomposition": decomposition,
|
||||||
|
"parsed": inferred_tree,
|
||||||
|
})
|
||||||
|
knowledge_after = copy.deepcopy(self.knowledge)
|
||||||
|
knowledge_diff_getter = lambda: diff_knowledge(knowledge_before,
|
||||||
|
knowledge_after)
|
||||||
|
|
||||||
|
return result, knowledge_diff_getter
|
||||||
|
@ -1,23 +1,47 @@
|
|||||||
from knowledge_base import KnowledgeBase
|
def resolve(elements, value):
|
||||||
|
if isinstance(value, int):
|
||||||
|
return elements[value]
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
def property_for_value(knowledge_base: KnowledgeBase, value):
|
def property_for_value(knowledge_base, value):
|
||||||
|
print(value)
|
||||||
|
print(knowledge_base[value])
|
||||||
return knowledge_base[value]['as_property']
|
return knowledge_base[value]['as_property']
|
||||||
|
|
||||||
|
|
||||||
def exists_property_with_value(knowledge_base: KnowledgeBase, subj, value):
|
def exists_property_with_value(knowledge_base, elements, subj, value):
|
||||||
|
subj = resolve(elements, subj)
|
||||||
|
value = resolve(elements, value)
|
||||||
|
|
||||||
|
if subj not in knowledge_base:
|
||||||
|
knowledge_base[subj] = {}
|
||||||
|
|
||||||
knowledge_base[subj][property_for_value(knowledge_base, value)] = value
|
knowledge_base[subj][property_for_value(knowledge_base, value)] = value
|
||||||
|
|
||||||
|
|
||||||
def pertenence_to_group(knowledge_base: KnowledgeBase, subj, group):
|
def pertenence_to_group(knowledge_base, elements, subj, group):
|
||||||
knowledge_base[subj]["group"] = group
|
subj = resolve(elements, subj)
|
||||||
|
group = resolve(elements, group)
|
||||||
|
|
||||||
|
if subj not in knowledge_base:
|
||||||
|
knowledge_base[subj] = {}
|
||||||
|
|
||||||
|
if "groups" not in knowledge_base[subj]:
|
||||||
|
knowledge_base[subj]["groups"] = set()
|
||||||
|
knowledge_base[subj]["groups"].add(group)
|
||||||
|
|
||||||
|
|
||||||
def has_capacity(knowledge_base: KnowledgeBase, subj, capacity):
|
def has_capacity(knowledge_base, elements, subj, capacity):
|
||||||
|
subj = resolve(elements, subj)
|
||||||
|
capacity = resolve(elements, capacity)
|
||||||
|
|
||||||
|
if subj not in knowledge_base:
|
||||||
|
knowledge_base[subj] = {}
|
||||||
|
|
||||||
if "capacities" not in knowledge_base[subj]:
|
if "capacities" not in knowledge_base[subj]:
|
||||||
knowledge_base[subj]["capacities"] = []
|
knowledge_base[subj]["capacities"] = set()
|
||||||
knowledge_base[subj]["capacities"].append(capacity)
|
knowledge_base[subj]["capacities"].add(capacity)
|
||||||
|
|
||||||
|
|
||||||
knowledge_ingestion = {
|
knowledge_ingestion = {
|
||||||
@ -27,7 +51,9 @@ knowledge_ingestion = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def integrate_information(knowledge_base: KnowledgeBase, example):
|
def integrate_information(knowledge_base, example):
|
||||||
method = example['parsed'][0]
|
method = example['parsed'][0]
|
||||||
args = example['parsed'][1:]
|
args = example['parsed'][1:]
|
||||||
knowledge_ingestion[method](knowledge_base, *args)
|
elements = example.get('elements', None)
|
||||||
|
|
||||||
|
knowledge_ingestion[method](knowledge_base, elements, *args)
|
||||||
|
@ -1,33 +0,0 @@
|
|||||||
from knowledge_base import KnowledgeBase
|
|
||||||
import knowledge_evaluation
|
|
||||||
import parsing
|
|
||||||
|
|
||||||
|
|
||||||
def get_fit(knowledge: KnowledgeBase, row):
|
|
||||||
for sample, ast in knowledge.trained:
|
|
||||||
if len(sample) != len(row):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if all(map(lambda x: not isinstance(sample[x], str) or sample[x] == row[x],
|
|
||||||
range(len(sample)))):
|
|
||||||
return sample, ast
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def train(knowledge_base: KnowledgeBase, examples):
|
|
||||||
|
|
||||||
# Parse everything
|
|
||||||
parsed_examples = []
|
|
||||||
for example in examples:
|
|
||||||
parsed_examples.append(parsing.integrate_language(knowledge_base, example))
|
|
||||||
|
|
||||||
# Reduce values
|
|
||||||
trained = parsing.reprocess_language_knowledge(knowledge_base, parsed_examples)
|
|
||||||
|
|
||||||
return KnowledgeBase(
|
|
||||||
knowledge=knowledge_base.knowledge,
|
|
||||||
examples=knowledge_base.examples + parsed_examples,
|
|
||||||
trained=trained,
|
|
||||||
)
|
|
||||||
|
|
@ -2,31 +2,29 @@
|
|||||||
|
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
|
|
||||||
from knowledge_base import KnowledgeBase
|
|
||||||
|
|
||||||
|
def make_template(knowledge_base, text, parsed):
|
||||||
def make_template(knowledge_base: KnowledgeBase, text, parsed):
|
|
||||||
tokens = text.split()
|
tokens = text.split()
|
||||||
|
matcher = list(tokens)
|
||||||
template = list(parsed)
|
template = list(parsed)
|
||||||
for i in range(len(tokens)):
|
for i in range(len(matcher)):
|
||||||
word = tokens[i]
|
word = matcher[i]
|
||||||
if word in template:
|
if word in template:
|
||||||
print(word, i, template)
|
|
||||||
template[template.index(word)] = i
|
template[template.index(word)] = i
|
||||||
print(knowledge_base)
|
matcher[i] = {
|
||||||
tokens[i] = {'groups': set(knowledge_base.knowledge[word]['groups'])}
|
'groups': set(knowledge_base.knowledge[word]['groups'])
|
||||||
return tokens, template
|
}
|
||||||
|
return tokens, matcher, template
|
||||||
|
|
||||||
|
|
||||||
def integrate_language(knowledge_base: KnowledgeBase, example):
|
def integrate_language(knowledge_base, example):
|
||||||
text = example["text"].lower()
|
text = example["text"].lower()
|
||||||
parsed = example["parsed"]
|
parsed = example["parsed"]
|
||||||
matcher, result = make_template(knowledge_base, text, parsed)
|
tokens, matcher, result = make_template(knowledge_base, text, parsed)
|
||||||
print(text)
|
print(text)
|
||||||
print(parsed)
|
print(parsed)
|
||||||
print()
|
print()
|
||||||
return matcher, result
|
return tokens, matcher, result
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_matching(sample, other):
|
def get_matching(sample, other):
|
||||||
@ -67,3 +65,15 @@ def reprocess_language_knowledge(knowledge_base, examples):
|
|||||||
print()
|
print()
|
||||||
return pattern_examples
|
return pattern_examples
|
||||||
|
|
||||||
|
|
||||||
|
def get_fit(knowledge, row):
|
||||||
|
for sample, ast in knowledge.trained:
|
||||||
|
if len(sample) != len(row):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if all(map(lambda x: (not isinstance(sample[x], str)
|
||||||
|
or sample[x] == row[x]),
|
||||||
|
range(len(sample)))):
|
||||||
|
return sample, ast
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
1
naive-nlu/requirements.txt
Normal file
1
naive-nlu/requirements.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
jsondiff
|
@ -1,4 +1,6 @@
|
|||||||
import nlu
|
import json
|
||||||
|
|
||||||
|
from knowledge_base import KnowledgeBase
|
||||||
|
|
||||||
examples = [
|
examples = [
|
||||||
{
|
{
|
||||||
@ -29,59 +31,68 @@ examples = [
|
|||||||
|
|
||||||
base_knowledge = {
|
base_knowledge = {
|
||||||
'icecream': {
|
'icecream': {
|
||||||
"groups": ['noun', 'object', 'comestible', 'sweet'],
|
"groups": set(['noun', 'object', 'comestible', 'sweet']),
|
||||||
},
|
},
|
||||||
'lava': {
|
'lava': {
|
||||||
"groups": ['noun', 'object'],
|
"groups": set(['noun', 'object']),
|
||||||
},
|
},
|
||||||
'earth': {
|
'earth': {
|
||||||
"groups": ['noun', 'object', 'planet'],
|
"groups": set(['noun', 'object', 'planet']),
|
||||||
},
|
},
|
||||||
'green': {
|
'green': {
|
||||||
"groups": ['noun', 'color', 'concept'],
|
"groups": set(['noun', 'color', 'concept']),
|
||||||
},
|
},
|
||||||
'plane': {
|
'plane': {
|
||||||
"groups": ['noun', 'object', 'vehicle', 'fast'],
|
"groups": set(['noun', 'object', 'vehicle', 'fast']),
|
||||||
},
|
},
|
||||||
'car': {
|
'car': {
|
||||||
"groups": ['noun', 'object', 'vehicle', 'slow-ish'],
|
"groups": set(['noun', 'object', 'vehicle', 'slow-ish']),
|
||||||
},
|
},
|
||||||
'wale': {
|
'wale': {
|
||||||
"groups": ['noun', 'object', 'living-being']
|
"groups": set(['noun', 'object', 'living-being']),
|
||||||
},
|
},
|
||||||
'cold': {
|
'cold': {
|
||||||
"groups": ['property', 'temperature'],
|
"groups": set(['property', 'temperature']),
|
||||||
|
"as_property": "temperature",
|
||||||
},
|
},
|
||||||
'dangerous': {
|
'dangerous': {
|
||||||
"groups": ['property'],
|
"groups": set(['property']),
|
||||||
|
"as_property": "safety",
|
||||||
},
|
},
|
||||||
'planet': {
|
'planet': {
|
||||||
"groups": ['noun', 'group'],
|
"groups": set(['noun', 'group']),
|
||||||
},
|
},
|
||||||
'color': {
|
'color': {
|
||||||
"groups": ['property', 'group'],
|
"groups": set(['property', 'group']),
|
||||||
},
|
},
|
||||||
'fly': {
|
'fly': {
|
||||||
"groups": ['verb'],
|
"groups": set(['verb']),
|
||||||
},
|
},
|
||||||
'swim': {
|
'swim': {
|
||||||
"groups": ['verb'],
|
"groups": set(['verb']),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
knowledge = nlu.KnowledgeBase(
|
knowledge = KnowledgeBase(
|
||||||
examples=[],
|
knowledge=base_knowledge,
|
||||||
trained=[],
|
|
||||||
knowledge=base_knowledge
|
|
||||||
)
|
)
|
||||||
|
|
||||||
knowledge = nlu.train(knowledge, examples)
|
differences = knowledge.train(examples)
|
||||||
|
print("----")
|
||||||
|
print(differences())
|
||||||
|
print("----")
|
||||||
|
|
||||||
for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
|
for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
|
||||||
row = test['text'].lower().split()
|
row = test['text'].lower().split()
|
||||||
fit = nlu.get_fit(knowledge, row)
|
result, differences = knowledge.process(row)
|
||||||
print(test['text'], fit)
|
|
||||||
|
print("result:", result)
|
||||||
|
print(differences())
|
||||||
|
print()
|
||||||
|
print('-----')
|
||||||
|
print(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
Reference in New Issue
Block a user