diff --git a/naive-nlu/knowledge_base.py b/naive-nlu/knowledge_base.py index c139037..e1e94b5 100644 --- a/naive-nlu/knowledge_base.py +++ b/naive-nlu/knowledge_base.py @@ -1,8 +1,59 @@ -import collections +import copy -KnowledgeBase = collections.namedtuple('KnowledgeBase', - [ - 'examples', # Language examples - 'knowledge', # Knowledge about the world - 'trained', - ]) +import parsing +import knowledge_evaluation + + +def diff_knowledge(before, after): + import jsondiff + return jsondiff.diff(before, after) + + +class KnowledgeBase(object): + def __init__(self, knowledge, examples=[], trained=[]): + self.knowledge = copy.copy(knowledge) + self.examples = copy.copy(examples) + self.trained = copy.copy(trained) + + def train(self, examples): + knowledge_before = copy.deepcopy(self.knowledge) + + # Parse everything + parsed_examples = [] + for example in examples: + tokens, decomposition, inferred_tree = parsing.integrate_language(self, example) + print(tokens) + knowledge_evaluation.integrate_information(self.knowledge, { + "elements": tokens, + "decomposition": decomposition, + "parsed": inferred_tree, + }) + parsed_examples.append((decomposition, inferred_tree)) + + # Reduce values + trained = parsing.reprocess_language_knowledge(self, parsed_examples) + + self.examples += parsed_examples + self.trained = trained + + knowledge_after = copy.deepcopy(self.knowledge) + knowledge_diff_getter = lambda: diff_knowledge(knowledge_before, + knowledge_after) + + return knowledge_diff_getter + + + def process(self, row): + knowledge_before = copy.deepcopy(self.knowledge) + decomposition, inferred_tree = parsing.get_fit(self, row) + result = knowledge_evaluation.integrate_information(self.knowledge, + { + "elements": row, + "decomposition": decomposition, + "parsed": inferred_tree, + }) + knowledge_after = copy.deepcopy(self.knowledge) + knowledge_diff_getter = lambda: diff_knowledge(knowledge_before, + knowledge_after) + + return result, knowledge_diff_getter diff --git a/naive-nlu/knowledge_evaluation.py b/naive-nlu/knowledge_evaluation.py index fefb046..8c3acd4 100644 --- a/naive-nlu/knowledge_evaluation.py +++ b/naive-nlu/knowledge_evaluation.py @@ -1,23 +1,47 @@ -from knowledge_base import KnowledgeBase +def resolve(elements, value): + if isinstance(value, int): + return elements[value] + return value -def property_for_value(knowledge_base: KnowledgeBase, value): +def property_for_value(knowledge_base, value): + print(value) + print(knowledge_base[value]) return knowledge_base[value]['as_property'] -def exists_property_with_value(knowledge_base: KnowledgeBase, subj, value): +def exists_property_with_value(knowledge_base, elements, subj, value): + subj = resolve(elements, subj) + value = resolve(elements, value) + + if subj not in knowledge_base: + knowledge_base[subj] = {} knowledge_base[subj][property_for_value(knowledge_base, value)] = value -def pertenence_to_group(knowledge_base: KnowledgeBase, subj, group): - knowledge_base[subj]["group"] = group +def pertenence_to_group(knowledge_base, elements, subj, group): + subj = resolve(elements, subj) + group = resolve(elements, group) + + if subj not in knowledge_base: + knowledge_base[subj] = {} + + if "groups" not in knowledge_base[subj]: + knowledge_base[subj]["groups"] = set() + knowledge_base[subj]["groups"].add(group) -def has_capacity(knowledge_base: KnowledgeBase, subj, capacity): +def has_capacity(knowledge_base, elements, subj, capacity): + subj = resolve(elements, subj) + capacity = resolve(elements, capacity) + + if subj not in knowledge_base: + knowledge_base[subj] = {} + if "capacities" not in knowledge_base[subj]: - knowledge_base[subj]["capacities"] = [] - knowledge_base[subj]["capacities"].append(capacity) + knowledge_base[subj]["capacities"] = set() + knowledge_base[subj]["capacities"].add(capacity) knowledge_ingestion = { @@ -27,7 +51,9 @@ knowledge_ingestion = { } -def integrate_information(knowledge_base: KnowledgeBase, example): +def integrate_information(knowledge_base, example): method = example['parsed'][0] args = example['parsed'][1:] - knowledge_ingestion[method](knowledge_base, *args) + elements = example.get('elements', None) + + knowledge_ingestion[method](knowledge_base, elements, *args) diff --git a/naive-nlu/nlu.py b/naive-nlu/nlu.py deleted file mode 100644 index 8206907..0000000 --- a/naive-nlu/nlu.py +++ /dev/null @@ -1,33 +0,0 @@ -from knowledge_base import KnowledgeBase -import knowledge_evaluation -import parsing - - -def get_fit(knowledge: KnowledgeBase, row): - for sample, ast in knowledge.trained: - if len(sample) != len(row): - continue - - if all(map(lambda x: not isinstance(sample[x], str) or sample[x] == row[x], - range(len(sample)))): - return sample, ast - else: - return None - - -def train(knowledge_base: KnowledgeBase, examples): - - # Parse everything - parsed_examples = [] - for example in examples: - parsed_examples.append(parsing.integrate_language(knowledge_base, example)) - - # Reduce values - trained = parsing.reprocess_language_knowledge(knowledge_base, parsed_examples) - - return KnowledgeBase( - knowledge=knowledge_base.knowledge, - examples=knowledge_base.examples + parsed_examples, - trained=trained, - ) - diff --git a/naive-nlu/parsing.py b/naive-nlu/parsing.py index de47fe6..0e4f5f7 100644 --- a/naive-nlu/parsing.py +++ b/naive-nlu/parsing.py @@ -2,31 +2,29 @@ from functools import reduce -from knowledge_base import KnowledgeBase - -def make_template(knowledge_base: KnowledgeBase, text, parsed): +def make_template(knowledge_base, text, parsed): tokens = text.split() + matcher = list(tokens) template = list(parsed) - for i in range(len(tokens)): - word = tokens[i] + for i in range(len(matcher)): + word = matcher[i] if word in template: - print(word, i, template) template[template.index(word)] = i - print(knowledge_base) - tokens[i] = {'groups': set(knowledge_base.knowledge[word]['groups'])} - return tokens, template + matcher[i] = { + 'groups': set(knowledge_base.knowledge[word]['groups']) + } + return tokens, matcher, template -def integrate_language(knowledge_base: KnowledgeBase, example): +def integrate_language(knowledge_base, example): text = example["text"].lower() parsed = example["parsed"] - matcher, result = make_template(knowledge_base, text, parsed) + tokens, matcher, result = make_template(knowledge_base, text, parsed) print(text) print(parsed) print() - return matcher, result - + return tokens, matcher, result def get_matching(sample, other): @@ -67,3 +65,15 @@ def reprocess_language_knowledge(knowledge_base, examples): print() return pattern_examples + +def get_fit(knowledge, row): + for sample, ast in knowledge.trained: + if len(sample) != len(row): + continue + + if all(map(lambda x: (not isinstance(sample[x], str) + or sample[x] == row[x]), + range(len(sample)))): + return sample, ast + else: + return None diff --git a/naive-nlu/requirements.txt b/naive-nlu/requirements.txt new file mode 100644 index 0000000..9891d55 --- /dev/null +++ b/naive-nlu/requirements.txt @@ -0,0 +1 @@ +jsondiff diff --git a/naive-nlu/test.py b/naive-nlu/test.py index e44cba3..552b416 100644 --- a/naive-nlu/test.py +++ b/naive-nlu/test.py @@ -1,4 +1,6 @@ -import nlu +import json + +from knowledge_base import KnowledgeBase examples = [ { @@ -29,59 +31,68 @@ examples = [ base_knowledge = { 'icecream': { - "groups": ['noun', 'object', 'comestible', 'sweet'], + "groups": set(['noun', 'object', 'comestible', 'sweet']), }, 'lava': { - "groups": ['noun', 'object'], + "groups": set(['noun', 'object']), }, 'earth': { - "groups": ['noun', 'object', 'planet'], + "groups": set(['noun', 'object', 'planet']), }, 'green': { - "groups": ['noun', 'color', 'concept'], + "groups": set(['noun', 'color', 'concept']), }, 'plane': { - "groups": ['noun', 'object', 'vehicle', 'fast'], + "groups": set(['noun', 'object', 'vehicle', 'fast']), }, 'car': { - "groups": ['noun', 'object', 'vehicle', 'slow-ish'], + "groups": set(['noun', 'object', 'vehicle', 'slow-ish']), }, 'wale': { - "groups": ['noun', 'object', 'living-being'] + "groups": set(['noun', 'object', 'living-being']), }, 'cold': { - "groups": ['property', 'temperature'], + "groups": set(['property', 'temperature']), + "as_property": "temperature", }, 'dangerous': { - "groups": ['property'], + "groups": set(['property']), + "as_property": "safety", }, 'planet': { - "groups": ['noun', 'group'], + "groups": set(['noun', 'group']), }, 'color': { - "groups": ['property', 'group'], + "groups": set(['property', 'group']), }, 'fly': { - "groups": ['verb'], + "groups": set(['verb']), }, 'swim': { - "groups": ['verb'], + "groups": set(['verb']), }, } def main(): - knowledge = nlu.KnowledgeBase( - examples=[], - trained=[], - knowledge=base_knowledge + knowledge = KnowledgeBase( + knowledge=base_knowledge, ) - knowledge = nlu.train(knowledge, examples) + differences = knowledge.train(examples) + print("----") + print(differences()) + print("----") + for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: row = test['text'].lower().split() - fit = nlu.get_fit(knowledge, row) - print(test['text'], fit) + result, differences = knowledge.process(row) + + print("result:", result) + print(differences()) + print() + print('-----') + print(json.dumps(sorted(knowledge.knowledge.keys()), indent=4)) if __name__ == '__main__':