diff --git a/naive-nlu/knowledge_base.py b/naive-nlu/knowledge_base.py new file mode 100644 index 0000000..c139037 --- /dev/null +++ b/naive-nlu/knowledge_base.py @@ -0,0 +1,8 @@ +import collections + +KnowledgeBase = collections.namedtuple('KnowledgeBase', + [ + 'examples', # Language examples + 'knowledge', # Knowledge about the world + 'trained', + ]) diff --git a/naive-nlu/knowledge_evaluation.py b/naive-nlu/knowledge_evaluation.py new file mode 100644 index 0000000..fefb046 --- /dev/null +++ b/naive-nlu/knowledge_evaluation.py @@ -0,0 +1,33 @@ +from knowledge_base import KnowledgeBase + + +def property_for_value(knowledge_base: KnowledgeBase, value): + return knowledge_base[value]['as_property'] + + +def exists_property_with_value(knowledge_base: KnowledgeBase, subj, value): + + knowledge_base[subj][property_for_value(knowledge_base, value)] = value + + +def pertenence_to_group(knowledge_base: KnowledgeBase, subj, group): + knowledge_base[subj]["group"] = group + + +def has_capacity(knowledge_base: KnowledgeBase, subj, capacity): + if "capacities" not in knowledge_base[subj]: + knowledge_base[subj]["capacities"] = [] + knowledge_base[subj]["capacities"].append(capacity) + + +knowledge_ingestion = { + "exists-property-with-value": exists_property_with_value, + "pertenence-to-group": pertenence_to_group, + "has-capacity": has_capacity, +} + + +def integrate_information(knowledge_base: KnowledgeBase, example): + method = example['parsed'][0] + args = example['parsed'][1:] + knowledge_ingestion[method](knowledge_base, *args) diff --git a/naive-nlu/nlu.py b/naive-nlu/nlu.py index 4e2f208..8206907 100644 --- a/naive-nlu/nlu.py +++ b/naive-nlu/nlu.py @@ -1,130 +1,6 @@ -import collections -from functools import reduce - - -# # # # # # # # # # Base representation - -KnowledgeBase = collections.namedtuple('KnowledgeBase', - [ - 'examples', # Language examples - 'knowledge', # Knowledge about the world - 'trained', - ]) - -# # # # # # # # # # Interpretation - - -def property_for_value(knowledge_base, value): - return knowledge_base[value]['as_property'] - - -def exists_property_with_value(knowledge_base, subj, value): - - knowledge_base[subj][property_for_value(knowledge_base, value)] = value - - -def pertenence_to_group(knowledge_base, subj, group): - knowledge_base[subj]["group"] = group - - -def has_capacity(knowledge_base, subj, capacity): - if "capacities" not in knowledge_base[subj]: - knowledge_base[subj]["capacities"] = [] - knowledge_base[subj]["capacities"].append(capacity) - - -knowledge_ingestion = { - "exists-property-with-value": exists_property_with_value, - "pertenence-to-group": pertenence_to_group, - "has-capacity": has_capacity, -} - - -def integrate_information(knowledge_base, example): - method = example['parsed'][0] - args = example['parsed'][1:] - knowledge_ingestion[method](knowledge_base, *args) - -# # # # # # # # # # Parsing - - -def make_template(knowledge_base: KnowledgeBase, text, parsed): - tokens = text.split() - template = list(parsed) - for i in range(len(tokens)): - word = tokens[i] - if word in template: - print(word, i, template) - template[template.index(word)] = i - print(knowledge_base) - tokens[i] = {'groups': set(knowledge_base.knowledge[word]['groups'])} - return tokens, template - - -def integrate_language(knowledge_base: KnowledgeBase, example): - text = example["text"].lower() - parsed = example["parsed"] - matcher, result = make_template(knowledge_base, text, parsed) - print(text) - print(parsed) - print() - return matcher, result - - -def train(knowledge_base: KnowledgeBase, examples): - - # Parse everything - parsed_examples = [] - for example in examples: - parsed_examples.append(integrate_language(knowledge_base, example)) - - # Reduce values - trained = reprocess_knowledge(knowledge_base, parsed_examples) - - return KnowledgeBase( - knowledge=knowledge_base.knowledge, - examples=knowledge_base.examples + parsed_examples, - trained=trained, - ) - - -def reprocess_knowledge(knowledge_base, examples): - examples = knowledge_base.examples + examples - - print('\n'.join(map(str, knowledge_base.examples))) - print("--") - - pattern_examples = [] - for i, sample in enumerate(examples): - other = examples[:i] + examples[i + 1:] - print(sample) - match = get_matching(sample, other) - print("->", match) - if len(match) > 0: - sample = (match, sample[1],) - pattern_examples.append(sample) - print() - return pattern_examples - - -def get_matching(sample, other): - l = len(sample[0]) - other = list(filter(lambda x: len(x[0]) == l, other)) - for i in range(l): - if len(other) == 0: - return [] - - if not isinstance(sample[0][i], str): - other = list(filter(lambda x: not isinstance(x[0][i], str) and - len(x[0][i]['groups'] & sample[0][i]['groups']) > 0, - other)) - - return [sample[0][x] if isinstance(sample[0][x], str) - else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b, - map(lambda y: y[0][x]['groups'], - other))} - for x - in range(l)] +from knowledge_base import KnowledgeBase +import knowledge_evaluation +import parsing def get_fit(knowledge: KnowledgeBase, row): @@ -137,3 +13,21 @@ def get_fit(knowledge: KnowledgeBase, row): return sample, ast else: return None + + +def train(knowledge_base: KnowledgeBase, examples): + + # Parse everything + parsed_examples = [] + for example in examples: + parsed_examples.append(parsing.integrate_language(knowledge_base, example)) + + # Reduce values + trained = parsing.reprocess_language_knowledge(knowledge_base, parsed_examples) + + return KnowledgeBase( + knowledge=knowledge_base.knowledge, + examples=knowledge_base.examples + parsed_examples, + trained=trained, + ) + diff --git a/naive-nlu/parsing.py b/naive-nlu/parsing.py new file mode 100644 index 0000000..de47fe6 --- /dev/null +++ b/naive-nlu/parsing.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +from functools import reduce + +from knowledge_base import KnowledgeBase + + +def make_template(knowledge_base: KnowledgeBase, text, parsed): + tokens = text.split() + template = list(parsed) + for i in range(len(tokens)): + word = tokens[i] + if word in template: + print(word, i, template) + template[template.index(word)] = i + print(knowledge_base) + tokens[i] = {'groups': set(knowledge_base.knowledge[word]['groups'])} + return tokens, template + + +def integrate_language(knowledge_base: KnowledgeBase, example): + text = example["text"].lower() + parsed = example["parsed"] + matcher, result = make_template(knowledge_base, text, parsed) + print(text) + print(parsed) + print() + return matcher, result + + + +def get_matching(sample, other): + l = len(sample[0]) + other = list(filter(lambda x: len(x[0]) == l, other)) + for i in range(l): + if len(other) == 0: + return [] + + if not isinstance(sample[0][i], str): + other = list(filter(lambda x: not isinstance(x[0][i], str) and + len(x[0][i]['groups'] & sample[0][i]['groups']) > 0, + other)) + + return [sample[0][x] if isinstance(sample[0][x], str) + else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b, + map(lambda y: y[0][x]['groups'], + other))} + for x + in range(l)] + + +def reprocess_language_knowledge(knowledge_base, examples): + examples = knowledge_base.examples + examples + + print('\n'.join(map(str, knowledge_base.examples))) + print("--") + + pattern_examples = [] + for i, sample in enumerate(examples): + other = examples[:i] + examples[i + 1:] + print(sample) + match = get_matching(sample, other) + print("->", match) + if len(match) > 0: + sample = (match, sample[1],) + pattern_examples.append(sample) + print() + return pattern_examples +