diff --git a/absorption-test.py b/absorption-test.py new file mode 100644 index 0000000..023d742 --- /dev/null +++ b/absorption-test.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +import json +import collections + +examples = [ + { + "text": "icecream is cold", + "parsed": ("exists-property-with-value", 'icecream', 'cold'), + }, + { + "text": "earth is a planet", + "parsed": ("pertenence-to-group", 'earth', 'planet'), + }, + { + "text": "Green is a color", + "parsed": ("pertenence-to-group", 'green', 'color'), + }, + { + "text": "airplanes do fly", + "parsed": ("has-capacity", 'airplane', 'fly') + } +] + +knowledge_base = collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict())) + + +def property_for_value(value): + if "cold": + return "temperature" + + +def exists_property_with_value(subj, value): + knowledge_base[subj][property_for_value(value)] = value + + +def pertenence_to_group(subj, group): + knowledge_base[subj]["group"] = group + + +def has_capacity(subj, capacity): + if "capacities" not in knowledge_base[subj]: + knowledge_base[subj]["capacities"] = [] + knowledge_base[subj]["capacities"].append(capacity) + + +knowledge_ingestion = { + "exists-property-with-value": exists_property_with_value, + "pertenence-to-group": pertenence_to_group, + "has-capacity": has_capacity, +} + + +def ingest(example): + method = example['parsed'][0] + args = example['parsed'][1:] + knowledge_ingestion[method](*args) + + +for example in examples: + ingest(example) + +print(json.dumps(knowledge_base, indent=4, sort_keys=True)) diff --git a/parsing_test.py b/parsing_test.py new file mode 100644 index 0000000..74f011b --- /dev/null +++ b/parsing_test.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python + +import json +from functools import reduce + +examples = [ + { + "text": "icecream is cold", + "parsed": ("exists-property-with-value", 'icecream', 'cold'), + }, + { + "text": "lava is dangerous", + "parsed": ("exists-property-with-value", 'lava', 'dangerous') + }, + { + "text": "earth is a planet", + "parsed": ("pertenence-to-group", 'earth', 'planet'), + }, + { + "text": "Green is a color", + "parsed": ("pertenence-to-group", 'green', 'color'), + }, + { + "text": "a plane can fly", + "parsed": ("has-capacity", 'plane', 'fly') + }, + { + "text": "a wale can swim", + "parsed": ("has-capacity", 'wale', 'swim') + } +] + +knowledge_base = { + 'icecream': { + "groups": ['noun', 'object', 'comestible', 'sweet'], + }, + 'lava': { + "groups": ['noun', 'object'], + }, + 'earth': { + "groups": ['noun', 'object', 'planet'], + }, + 'green': { + "groups": ['noun', 'color', 'concept'], + }, + 'plane': { + "groups": ['noun', 'object', 'vehicle', 'fast'], + }, + 'car': { + "groups": ['noun', 'object', 'vehicle', 'slow-ish'], + }, + 'wale': { + "groups": ['noun', 'object', 'living-being'] + }, + 'cold': { + "groups": ['property', 'temperature'], + }, + 'dangerous': { + "groups": ['property'], + }, + 'planet': { + "groups": ['noun', 'group'], + }, + 'color': { + "groups": ['property', 'group'], + }, + 'fly': { + "groups": ['verb'], + }, + 'swim': { + "groups": ['verb'], + }, +} + + +def make_template(text, parsed): + tokens = text.split() + template = list(parsed) + for i in range(len(tokens)): + word = tokens[i] + if word in template: + print(word, i, template) + template[template.index(word)] = i + tokens[i] = {'groups': set(knowledge_base[word]['groups'])} + return tokens, template + + +def ingest(example): + text = example["text"].lower() + parsed = example["parsed"] + matcher, result = make_template(text, parsed) + print(text) + print(parsed) + print() + return matcher, result + + +language_training = [] +for example in examples: + language_training.append(ingest(example)) + + +def get_matching(sample, other): + l = len(sample[0]) + other = list(filter(lambda x: len(x[0]) == l, other)) + for i in range(l): + if len(other) == 0: + return [] + + if not isinstance(sample[0][i], str): + other = list(filter(lambda x: not isinstance(x[0][i], str) and + len(x[0][i]['groups'] & sample[0][i]['groups']) > 0, + other)) + + return [sample[0][x] if isinstance(sample[0][x], str) + else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b, + map(lambda y: y[0][x]['groups'], + other))} + for x + in range(l)] + + +print('\n'.join(map(str, language_training))) +print("--") +pattern_examples = [] +for i, sample in enumerate(language_training): + other = language_training[:i] + language_training[i + 1:] + print(sample) + match = get_matching(sample, other) + print("->", match) + if len(match) > 0: + sample = (match, sample[1],) + pattern_examples.append(sample) + print() + +def get_fit(patterns, row): + for sample, ast in patterns: + if len(sample) != len(row): + continue + + if all(map(lambda x: not isinstance(sample[x], str) or sample[x] == row[x], + range(len(sample)))): + return sample, ast + else: + return None + +for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: + row = test['text'].lower().split() + fit = get_fit(pattern_examples, row) + print(test['text'], fit) + +# while True: +# row = input('>>> ').lower().split() +# fit = get_fit(pattern_examples, row) +# if fit is None: +# print("No fit") +# else: +# print(fit)