Expand the test cases applied in the NLU approach.

This commit is contained in:
kenkeiras 2017-06-04 18:57:02 +02:00
commit 46dcf55793
14 changed files with 1122 additions and 194 deletions

15
naive-nlu/setup.py Normal file
View File

@ -0,0 +1,15 @@
from setuptools import setup
setup(name='tree_nlu',
version='0.1',
description='Naïve AST based NLU.',
author='kenkeiras',
author_email='kenkeiras@codigoparallevar.com',
packages=['tree_nlu'],
scripts=[
],
include_package_data=True,
install_requires = [
'jsondiff',
],
zip_safe=False)

View File

@ -1,151 +1,4 @@
import json from tree_nlu import test
import logging
logging.getLogger().setLevel(logging.INFO)
from knowledge_base import KnowledgeBase
from modifiable_property import is_modifiable_property
examples = [
{
"text": "icecream is cold",
"parsed": ("exists-property-with-value", 'icecream', 'cold'),
},
{
"text": "is icecream cold?",
"parsed": ("question", ("exists-property-with-value", 'icecream', 'cold'))
},
{
"text": "lava is dangerous",
"parsed": ("exists-property-with-value", 'lava', 'dangerous')
},
{
"text": "is lava dangerous?",
"parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')),
},
{
"text": "earth is a planet",
"parsed": ("pertenence-to-group", 'earth', 'planet'),
},
{
"text": "io is a moon",
"parsed": ("pertenence-to-group", 'io', 'moon'),
},
{
"text": "is earth a moon?",
"parsed": ("question", ("pertenence-to-group", 'earth', 'moon')),
},
{
"text": "Green is a color",
"parsed": ("pertenence-to-group", 'green', 'color'),
},
{
"text": "a plane can fly",
"parsed": ("has-capacity", 'plane', 'fly')
},
{
"text": "a wale can swim",
"parsed": ("has-capacity", 'wale', 'swim')
},
]
base_knowledge = {
'icecream': {
"groups": set(['noun', 'object', 'comestible', 'sweet']),
},
'lava': {
"groups": set(['noun', 'object']),
},
'earth': {
"groups": set(['noun', 'object', 'planet']),
},
'io': {
"groups": set(['noun', 'object']),
},
'green': {
"groups": set(['noun', 'color', 'concept']),
},
'plane': {
"groups": set(['noun', 'object', 'vehicle', 'fast']),
},
'car': {
"groups": set(['noun', 'object', 'vehicle', 'slow-ish']),
},
'wale': {
"groups": set(['noun', 'object', 'living-being']),
},
'cold': {
"groups": set(['property', 'temperature']),
"as_property": "temperature",
},
'dangerous': {
"groups": set(['property']),
"as_property": "safety",
},
'planet': {
"groups": set(['noun', 'group']),
},
'moon': {
"groups": set(['noun', 'group']),
},
'color': {
"groups": set(['property', 'group']),
},
'fly': {
"groups": set(['verb']),
},
'swim': {
"groups": set(['verb']),
},
}
def test_assumption(expectedResponse, knowledge, query):
logging.info("Query: {}".format(query['text']))
logging.info("Expected: {}".format(expectedResponse))
result, abstract_tree, diff = knowledge.process(query['text'])
end_result = result.getter() if is_modifiable_property(result) else result
logging.info("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result))
assert(end_result == expectedResponse)
def main():
knowledge = KnowledgeBase(
knowledge=base_knowledge,
)
differences = knowledge.train(examples)
logging.info("----")
logging.info(differences())
logging.info("----")
test_assumption(True, knowledge, {'text': 'earth is a planet'})
test_assumption(True, knowledge, {'text': 'is lava dangerous?'})
for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
row = test['text']
result, inferred_tree, differences = knowledge.process(row)
logging.info("result:", result)
logging.info(differences())
logging.info("---")
logging.info('-----')
logging.info(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
logging.info('-----')
queryTrue = {
"text": "is io a moon?",
"parsed": ("question", ("pertenence-to-group", "io", "moon"))
}
queryFalse = {
"text": "is io a planet?",
"parsed": ("question", ("pertenence-to-group", "io", "planet"))
}
test_assumption(False, knowledge, queryFalse)
test_assumption(True, knowledge, queryTrue)
if __name__ == '__main__': if __name__ == '__main__':
main() test.main()

View File

View File

@ -1,5 +1,5 @@
import sys import sys
import parameters from . import parameters
def show_depth(depth: int, zoom: int=2): def show_depth(depth: int, zoom: int=2):
offset = int((parameters.MAX_RECURSIONS - depth) / (2 / zoom)) offset = int((parameters.MAX_RECURSIONS - depth) / (2 / zoom))

View File

@ -1,9 +1,10 @@
import copy import copy
import logging import logging
import parsing
import knowledge_evaluation from . import parsing
from modifiable_property import is_modifiable_property from . import knowledge_evaluation
from .modifiable_property import is_modifiable_property
def diff_knowledge(before, after): def diff_knowledge(before, after):
@ -23,6 +24,13 @@ class KnowledgeBase(object):
# Parse everything # Parse everything
parsed_examples = [] parsed_examples = []
for example in examples: for example in examples:
# If there's parsed data, leverage it ASAP
if 'parsed' in example:
result = knowledge_evaluation.integrate_information(self.knowledge, {
"parsed": example['parsed'],
})
self.act_upon(result)
logging.info("\x1b[7;32m> {} \x1b[0m".format(example)) logging.info("\x1b[7;32m> {} \x1b[0m".format(example))
tokens, decomposition, inferred_tree = parsing.integrate_language(self, example) tokens, decomposition, inferred_tree = parsing.integrate_language(self, example)
logging.info(tokens) logging.info(tokens)
@ -48,6 +56,7 @@ class KnowledgeBase(object):
def process(self, row): def process(self, row):
row = row.lower()
knowledge_before = copy.deepcopy(self.knowledge) knowledge_before = copy.deepcopy(self.knowledge)
logging.info("\x1b[7;32m> {} \x1b[0m".format(row)) logging.info("\x1b[7;32m> {} \x1b[0m".format(row))
tokens = parsing.to_tokens(row) tokens = parsing.to_tokens(row)

View File

@ -1,4 +1,4 @@
from modifiable_property import ( from .modifiable_property import (
ModifiableProperty, ModifiableProperty,
ModifiablePropertyWithAst, ModifiablePropertyWithAst,
is_modifiable_property, is_modifiable_property,
@ -41,17 +41,43 @@ def get_subquery_type(knowledge_base, atom):
def property_for_value(knowledge_base, value): def property_for_value(knowledge_base, value):
return knowledge_base[value]['as_property'] if value in knowledge_base:
# Annotate the property as property
groups = knowledge_base[value].get('groups', {'property'})
groups.add('property')
knowledge_base[value]['groups'] = groups
# And find the property "name"
if 'as_property' in knowledge_base[value]:
return knowledge_base[value]['as_property']
return knowledge_base[value].get('groups', {'property'})
else:
# Consider that any property is... a property
knowledge_base[value] = {'groups': {'property'}}
return {'property'}
def modifiable_property_from_property(prop, path, value): def modifiable_property_from_property(prop, path, value):
def getter(): def getter():
nonlocal prop, path, value nonlocal prop, path, value
return (path in prop) and prop[path] == value if isinstance(path, set):
# If the property is from a set, it's true if any possible
# path has a element as true
return any(map(lambda possible_path: ((possible_path in prop)
and
(prop[possible_path] == value)),
path))
else:
return (path in prop) and prop[path] == value
def setter(): def setter():
nonlocal prop, path, value nonlocal prop, path, value
prop[path] = value if isinstance(path, set):
for possible_path in path:
prop[possible_path] = value
else:
prop[path] = value
return ModifiableProperty( return ModifiableProperty(
getter=getter, getter=getter,
@ -87,22 +113,42 @@ def modifiable_element_for_existance_in_set(container, set_name, element):
setter=setter, setter=setter,
) )
def modifiable_element_for_existance_in_group(container, element, backlink, set_name='groups'):
def getter():
nonlocal container, element, backlink, set_name
return (set_name in container) and (element in container[set_name])
def setter():
nonlocal container, set_name, element
backlink['groups'].add(set_name)
return container[set_name].add(element)
return ModifiableProperty(
getter=getter,
setter=setter,
)
def pertenence_to_group(knowledge_base, elements, subj, group): def pertenence_to_group(knowledge_base, elements, subj, group):
subj = resolve(knowledge_base, elements, subj) subj = resolve(knowledge_base, elements, subj)
group = resolve(knowledge_base, elements, group) group = resolve(knowledge_base, elements, group)
if subj not in knowledge_base: if subj not in knowledge_base:
knowledge_base[subj] = {} knowledge_base[subj] = {'groups': set()}
if "groups" not in knowledge_base[subj]: if "groups" not in knowledge_base[subj]:
knowledge_base[subj]["groups"] = set() knowledge_base[subj]["groups"] = set()
return modifiable_element_for_existance_in_set( if group not in knowledge_base:
container=knowledge_base[subj], knowledge_base[group] = {'groups': set()}
set_name="groups",
element=group
)
if "groups" not in knowledge_base[group]:
knowledge_base[group]["groups"] = set()
return modifiable_element_for_existance_in_group(
container=knowledge_base[subj],
element=group,
backlink=knowledge_base[group],
)
def has_capacity(knowledge_base, elements, subj, capacity): def has_capacity(knowledge_base, elements, subj, capacity):
subj = resolve(knowledge_base, elements, subj) subj = resolve(knowledge_base, elements, subj)
@ -128,12 +174,48 @@ def question(knowledge_base, elements, subj):
return subj.getter() return subj.getter()
return subj return subj
def implies(knowledge_base, elements, precedent, consequent):
precedent = resolve(knowledge_base, elements, precedent)
consequent = resolve(knowledge_base, elements, consequent)
if precedent not in knowledge_base:
knowledge_base[precedent] = {'groups': set()}
if "implications" not in knowledge_base[precedent]:
knowledge_base[precedent]["implications"] = set()
return modifiable_element_for_existance_in_set(
container=knowledge_base[precedent],
set_name="implications",
element=consequent
)
def property_has_value(knowledge_base, elements, subj, prop, value):
subj = resolve(knowledge_base, elements, subj)
prop = resolve(knowledge_base, elements, prop)
value = resolve(knowledge_base, elements, value)
if subj not in knowledge_base:
knowledge_base[subj] = {'groups': set()}
if prop not in knowledge_base[subj]:
knowledge_base[subj][prop] = set()
return modifiable_element_for_existance_in_set(
container=knowledge_base[subj],
set_name=prop,
element=value
)
knowledge_ingestion = { knowledge_ingestion = {
"exists-property-with-value": exists_property_with_value, "exists-property-with-value": exists_property_with_value,
"pertenence-to-group": pertenence_to_group, "pertenence-to-group": pertenence_to_group,
"has-capacity": has_capacity, "has-capacity": has_capacity,
"question": question, "question": question,
"implies": implies,
"property-has-value": property_has_value,
} }

View File

@ -1,16 +1,16 @@
#!/usr/bin/env python #!/usr/bin/env python
import knowledge_evaluation from . import knowledge_evaluation
import depth_meter from . import depth_meter
import logging import logging
import re import re
import copy import copy
from functools import reduce from functools import reduce
from typing import List from typing import List, Dict
from modifiable_property import ModifiableProperty from .modifiable_property import ModifiableProperty
import parameters from . import parameters
# TODO: more flexible tokenization # TODO: more flexible tokenization
def to_tokens(text): def to_tokens(text):
@ -88,13 +88,15 @@ def integrate_language(knowledge_base, example):
for position, atom in lower_levels: for position, atom in lower_levels:
logging.debug("\x1b[1mSelecting\x1b[0m: {}".format(atom)) logging.debug("\x1b[1mSelecting\x1b[0m: {}".format(atom))
similar = get_similar_tree(knowledge_base, atom) similar = get_similar_tree(knowledge_base, atom, tokens)
remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar) remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar)
_, matcher, result = make_template(knowledge_base, tokens, atom) _, matcher, result = make_template(knowledge_base, tokens, atom)
logging.debug("Tx: {}".format(tokens)) logging.debug("Tx: {}".format(tokens))
logging.debug("Mx: {}".format(matcher)) logging.debug("Mx: {}".format(matcher))
logging.debug("Rx: {}".format(result)) logging.debug("Rx: {}".format(result))
logging.debug("Remix: {}".format(remix)) logging.debug("Remix: {}".format(remix))
logging.debug("Sx: {}".format(start_bounds))
logging.debug("Ex: {}".format(end_bounds))
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix) after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens)) assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens))
@ -127,16 +129,22 @@ def integrate_language(knowledge_base, example):
def apply_remix(tokens, remix): def apply_remix(tokens, remix):
rebuilt = [] rebuilt = []
for i in remix: for i in remix:
rebuilt.append(tokens[i]) if isinstance(i, int):
if i >= len(tokens):
return None
rebuilt.append(tokens[i])
else:
assert(isinstance(i, str))
rebuilt.append(i)
return rebuilt return rebuilt
def build_remix_matrix(knowledge_base, tokens, atom, similar): def build_remix_matrix(knowledge_base, tokens, atom, similar):
tokens = list(tokens) tokens = list(tokens)
tokens, matcher, result = make_template(knowledge_base, tokens, atom) tokens, matcher, result = make_template(knowledge_base, tokens, atom)
similar_matcher, similar_result, similar_result_resolved, _ = similar similar_matcher, similar_result, similar_result_resolved, _, _ = similar
start_bounds, end_bounds = find_bounds(matcher, similar_matcher) start_bounds, end_bounds = find_bounds(knowledge_base, matcher, similar_matcher)
for i, element in (end_bounds + start_bounds[::-1]): for i, element in (end_bounds + start_bounds[::-1]):
matcher.pop(i) matcher.pop(i)
@ -154,13 +162,14 @@ def get_possible_remixes(knowledge_base, matcher, similar_matcher):
for element in matcher: for element in matcher:
logging.debug("- {}".format(element)) logging.debug("- {}".format(element))
logging.debug("+ {}".format(similar_matcher)) logging.debug("+ {}".format(similar_matcher))
assert(element in similar_matcher or isinstance(element, dict)) if element in similar_matcher or isinstance(element, dict):
if isinstance(element, dict):
if isinstance(element, dict): indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
indexes = all_matching_indexes(knowledge_base, similar_matcher, element) else:
indexes = all_indexes(similar_matcher, element)
matrix.append(indexes)
else: else:
indexes = all_indexes(similar_matcher, element) matrix.append([element])
matrix.append(indexes)
# TODO: do some scoring to find the most "interesting combination" # TODO: do some scoring to find the most "interesting combination"
return [list(x) for x in list(zip(*matrix))] return [list(x) for x in list(zip(*matrix))]
@ -190,13 +199,21 @@ def all_matching_indexes(knowledge_base, collection, element):
instance = knowledge_base.knowledge[instance]["groups"] instance = knowledge_base.knowledge[instance]["groups"]
intersection = set(instance) & set(element) intersection = set(instance) & set(element)
if len(intersection) > 0: if (len(intersection) > 0 or (0 == len(instance) == len(element))):
indexes.append((i, intersection)) indexes.append((i, intersection))
return [x[0] for x in sorted(indexes, key=lambda x: len(x[1]), reverse=True)] return [x[0] for x in sorted(indexes, key=lambda x: len(x[1]), reverse=True)]
def find_bounds(matcher, similar_matcher): def element_matches_groups(knowledge, element: Dict, groups):
if isinstance(groups, str) and groups in knowledge:
return len(knowledge[element].get("groups", set()) & element['groups']) > 0
elif isinstance(groups, dict):
return len(element.get("groups", set()) & element['groups']) > 0
return False
def find_bounds(knowledge, matcher, similar_matcher):
start_bounds = [] start_bounds = []
for i, element in enumerate(matcher): for i, element in enumerate(matcher):
if element in similar_matcher: if element in similar_matcher:
@ -206,7 +223,15 @@ def find_bounds(matcher, similar_matcher):
end_bounds = [] end_bounds = []
for i, element in enumerate(matcher[::-1]): for i, element in enumerate(matcher[::-1]):
if element in similar_matcher: in_similar = False
if isinstance(element, str):
in_similar = element in similar_matcher
elif isinstance(element, dict):
in_similar = any(map(lambda groups: element_matches_groups(knowledge.knowledge,
element, groups),
similar_matcher))
if in_similar:
break break
else: else:
end_bounds.append((len(matcher) - (i + 1), element)) end_bounds.append((len(matcher) - (i + 1), element))
@ -214,7 +239,7 @@ def find_bounds(matcher, similar_matcher):
return start_bounds, end_bounds return start_bounds, end_bounds
def get_similar_tree(knowledge_base, atom): def get_similar_tree(knowledge_base, atom, tokens):
possibilities = [] possibilities = []
# Find matching possibilities # Find matching possibilities
@ -238,12 +263,17 @@ def get_similar_tree(knowledge_base, atom):
raw)) raw))
# TODO: Probably should take into account the categories of the elements in the "intake" ([0]) element # TODO: Probably should take into account the categories of the elements in the "intake" ([0]) element
score = sum([resolved[i] == atom[i] atom_score = sum([resolved[i] == atom[i]
for i for i
in range(min(len(resolved), in range(min(len(resolved),
len(atom)))]) len(atom)))])
sorted_possibilities.append((raw, possibility, resolved, score)) token_score = sum([similar_token in tokens
sorted_possibilities = sorted(sorted_possibilities, key=lambda p: p[3], reverse=True) for similar_token
in raw])
sorted_possibilities.append((raw, possibility, resolved, atom_score, token_score))
sorted_possibilities = sorted(sorted_possibilities, key=lambda p: p[3] * 100 + p[4], reverse=True)
if len(sorted_possibilities) < 1: if len(sorted_possibilities) < 1:
return None return None
@ -268,14 +298,23 @@ def get_matching(sample, other):
x[0][i][0] == sample[0][i][0], x[0][i][0] == sample[0][i][0],
other)) other))
return [sample[0][x] if isinstance(sample[0][x], str) matching = []
else for x in range(l): # Generate the combination of this and other(s) matcher
sample[0][x] if isinstance(sample[0][x], tuple) first_sample_data = sample[0][x]
else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b, if isinstance(first_sample_data, str):
map(lambda y: y[0][x]['groups'], matching.append(first_sample_data)
other))} elif isinstance(first_sample_data, tuple):
for x matching.append(first_sample_data)
in range(l)] else:
this_groups = sample[0][x]['groups']
if len(other) > 0:
other_groups = reduce(lambda a, b: a & b,
map(lambda y: y[0][x]['groups'],
other))
this_groups = this_groups & other_groups
matching.append({'groups': this_groups})
return matching
def reprocess_language_knowledge(knowledge_base, examples): def reprocess_language_knowledge(knowledge_base, examples):
@ -294,8 +333,16 @@ def reprocess_language_knowledge(knowledge_base, examples):
def reverse_remix(tree_section, remix): def reverse_remix(tree_section, remix):
result_section = [] result_section = []
offset = 0
for origin in remix: for origin in remix:
result_section.append(copy.deepcopy(tree_section[origin])) if isinstance(origin, int):
if origin >= len(tree_section):
return None
result_section.append(copy.deepcopy(tree_section[origin + offset]))
else:
assert(isinstance(origin, str))
offset += 1
return result_section + tree_section[len(remix):] return result_section + tree_section[len(remix):]
@ -332,6 +379,9 @@ def resolve_fit(knowledge, fit, remaining_recursions):
else: else:
((result_type, remixer), tokens) = element ((result_type, remixer), tokens) = element
remixed_tokens = reverse_remix(tokens, remixer) remixed_tokens = reverse_remix(tokens, remixer)
if remixed_tokens is None:
return None
minifit = get_fit(knowledge, remixed_tokens, remaining_recursions - 1) minifit = get_fit(knowledge, remixed_tokens, remaining_recursions - 1)
if minifit is None: if minifit is None:
return None return None

View File

@ -0,0 +1,34 @@
import traceback
import logging
from .tests import basic
from .tests import gac_100
logging.getLogger().setLevel(logging.ERROR)
tests = (
("basic", basic),
("gac 100", gac_100),
)
def main():
failed = False
for test_name, test_module in tests:
try:
test_module.main()
print(" \x1b[1;32m✓\x1b[0m {}".format(test_name))
except AssertionError as ae:
print(" \x1b[1;31m✗\x1b[0m {}{}".format(test_name,
('\n [Assertion] {}'.format(ae.args[0])) if len(ae.args) > 0
else ''))
failed = True
except Exception as e:
print(" \x1b[1;7;31m!\x1b[0m {}\n [Exception] {}".format(test_name, e))
failed = True
traceback.print_exc()
if failed:
exit(1)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,151 @@
import logging
import json
from ..knowledge_base import KnowledgeBase
from ..modifiable_property import is_modifiable_property
examples = [
{
"text": "icecream is cold",
"parsed": ("exists-property-with-value", 'icecream', 'cold'),
},
{
"text": "is icecream cold?",
"parsed": ("question", ("exists-property-with-value", 'icecream', 'cold'))
},
{
"text": "lava is dangerous",
"parsed": ("exists-property-with-value", 'lava', 'dangerous')
},
{
"text": "is lava dangerous?",
"parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')),
},
{
"text": "earth is a planet",
"parsed": ("pertenence-to-group", 'earth', 'planet'),
},
{
"text": "io is a moon",
"parsed": ("pertenence-to-group", 'io', 'moon'),
},
{
"text": "is earth a moon?",
"parsed": ("question", ("pertenence-to-group", 'earth', 'moon')),
},
{
"text": "Green is a color",
"parsed": ("pertenence-to-group", 'green', 'color'),
},
{
"text": "a plane can fly",
"parsed": ("has-capacity", 'plane', 'fly')
},
{
"text": "a wale can swim",
"parsed": ("has-capacity", 'wale', 'swim')
},
# {
# "text": "if earth is a planet, it is big",
# "parsed": ("implies",
# ("pertenence-to-group", 'earth', 'planet'),
# ("exists-property-with-value", 'earth', 'big')),
# },
]
base_knowledge = {
'icecream': {
"groups": {'noun', 'object', 'comestible', 'sweet'},
},
'lava': {
"groups": {'noun', 'object'},
},
'earth': {
"groups": {'noun', 'object', 'planet'},
},
'io': {
"groups": {'noun', 'object'},
},
'green': {
"groups": {'noun', 'color', 'concept'},
},
'plane': {
"groups": {'noun', 'object', 'vehicle', 'fast'},
},
'car': {
"groups": {'noun', 'object', 'vehicle', 'slow-ish'},
},
'wale': {
"groups": {'noun', 'object', 'living-being'},
},
'cold': {
"groups": {'property', 'temperature'},
"as_property": "temperature",
},
'dangerous': {
"groups": {'property'},
"as_property": "safety",
},
'planet': {
"groups": {'noun', 'group'},
},
'moon': {
"groups": {'noun', 'group'},
},
'color': {
"groups": {'property', 'group'},
},
'fly': {
"groups": {'verb'},
},
'swim': {
"groups": {'verb'},
},
}
def test_assumption(expectedResponse, knowledge, query):
logging.info("Query: {}".format(query['text']))
logging.info("Expected: {}".format(expectedResponse))
result, abstract_tree, diff = knowledge.process(query['text'])
end_result = result.getter() if is_modifiable_property(result) else result
logging.info("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result))
if end_result != expectedResponse:
raise AssertionError('{} is not {}'.format(end_result, expectedResponse))
def main():
knowledge = KnowledgeBase(
knowledge=base_knowledge,
)
differences = knowledge.train(examples)
logging.info("----")
logging.info(differences())
logging.info("----")
test_assumption(True, knowledge, {'text': 'earth is a planet'})
test_assumption(True, knowledge, {'text': 'is lava dangerous?'})
for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
row = test['text']
result, inferred_tree, differences = knowledge.process(row)
logging.info("result:", result)
logging.info(differences())
logging.info("---")
logging.info('-----')
logging.info(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
logging.info('-----')
queryTrue = {
"text": "is io a moon?",
"parsed": ("question", ("pertenence-to-group", "io", "moon"))
}
queryFalse = {
"text": "is io a planet?",
"parsed": ("question", ("pertenence-to-group", "io", "planet"))
}
test_assumption(False, knowledge, queryFalse)
test_assumption(True, knowledge, queryTrue)

View File

@ -0,0 +1,715 @@
from ..knowledge_base import KnowledgeBase
from ..utils.visuals import show_progbar
def _assert(args):
assert(args)
def _assert_msg(args, msg):
assert(args, msg)
examples = [
('full_example',
{
"text": "is icecream cold?",
"affirmation": "icecream is cold",
"parsed": ("question",
("exists-property-with-value", 'icecream', 'cold')),
"answer": True,
"after_execution": [(
lambda knowledge: _assert('cold' in knowledge.knowledge['icecream']['property'])
),],
}),
('full_example',
{
"text": "is earth a planet?",
"affirmation": "earth is a planet",
"parsed": ("question",
("pertenence-to-group", 'earth', 'planet')),
"answer": True,
"after_execution": [(
lambda knowledge: _assert('planet' in knowledge.knowledge['earth']['groups'])
),],
}),
('full_example',
{
"text": "Is green a color?",
"affirmation": "green is a color",
"parsed": ("question",
("pertenence-to-group", 'green', 'color')),
"answer": True,
"after_execution": [(
lambda knowledge: _assert('color' in knowledge.knowledge['green']['groups'])
),],
}),
('full_example',
{
"text": "do airplanes fly?",
"affirmation": "airplanes fly",
"parsed": ("question",
("has-capacity", 'plane', 'fly')),
"answer": True,
"after_execution": [(
lambda knowledge: _assert('fly' in knowledge.knowledge['plane']['capacities'])
),],
}),
('full_example',
{
"text": "Is it hot during the summer?",
"affirmation": "it is hot during summer",
"parsed": ("question",
("implies", 'summer', 'hot')),
"answer": True,
"after_execution": [(
lambda knowledge: _assert('hot' in knowledge.knowledge['summer']['implications'])
),],
}),
('full_example',
{
"text": "is chile in south america ?",
"affirmation": "chile is in south america",
"parsed": ("question",
("property-has-value", 'chile', 'location', 'south america')),
"answer": True,
"after_execution": [(
lambda knowledge: _assert('south america' in knowledge.knowledge['chile']['location'])
),],
}),
('full_example',
{
"text": "Was Socrates a man?",
"affirmation": "Socrates was a man",
"parsed": ("question",
("pertenence-to-group", 'socrates', 'man')),
"answer": True,
"after_execution": [(
lambda knowledge: _assert('man' in knowledge.knowledge['socrates']['groups'])
),],
}),
# {
# "text": "Computers use electricity?",
# "affirmation": "Computers use electricity?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "The dominant language in france is french?",
# "affirmation": "The dominant language in france is french?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "was abraham lincoln once president of the united states?",
# "affirmation": "was abraham lincoln once president of the united states?",
# "parsed": (),
# "answer": None,
# },
('text_example',
{
"question": "Is milk white?",
"affirmation": "milk is white",
"answer": True,
}),
# {
# "text": "do people have emotions?",
# "affirmation": "do people have emotions?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "do objects appear smaller as they move away from you?",
# "affirmation": "do objects appear smaller as they move away from you?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Does the human species have a male and female gender?",
# "affirmation": "Does the human species have a male and female gender?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is a mountain mostly made of rock?",
# "affirmation": "Is a mountain mostly made of rock?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "is sun microsystems a computer company?",
# "affirmation": "is sun microsystems a computer company?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Do you see with your eyes and smell with your nose?",
# "affirmation": "Do you see with your eyes and smell with your nose?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is smoking bad for your health?",
# "affirmation": "Is smoking bad for your health?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Does a dog have four legs?",
# "affirmation": "Does a dog have four legs?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Do mammals have hearts?",
# "affirmation": "Do mammals have hearts?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "is the Earth a planet?",
# "affirmation": "is the Earth a planet?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is water a liquid?",
# "affirmation": "Is water a liquid?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is Bugs Bunny a cartoon character?",
# "affirmation": "Is Bugs Bunny a cartoon character?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Do Humans communicate by Telephone?",
# "affirmation": "Do Humans communicate by Telephone?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "is beer a drink ?",
# "affirmation": "is beer a drink ?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "are there 12 months in a year?",
# "affirmation": "are there 12 months in a year?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "does the sun hurt your eyes when you look at it?",
# "affirmation": "does the sun hurt your eyes when you look at it?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Do most cars have doors?",
# "affirmation": "Do most cars have doors?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "is orange both a fruit and a colour?",
# "affirmation": "is orange both a fruit and a colour?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is water a necessity?",
# "affirmation": "Is water a necessity?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Do CDs have better quality sound than Cassettes?",
# "affirmation": "Do CDs have better quality sound than Cassettes?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "do animals die?",
# "affirmation": "do animals die?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is the arctic cold?",
# "affirmation": "Is the arctic cold?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Do people have 2 eyes?",
# "affirmation": "Do people have 2 eyes?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "does a person have a brain?",
# "affirmation": "does a person have a brain?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is the rain wet?",
# "affirmation": "Is the rain wet?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is division a mathematical operation?",
# "affirmation": "Is division a mathematical operation?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "is 400 greater than 399?",
# "affirmation": "is 400 greater than 399?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "is magenta a color?",
# "affirmation": "is magenta a color?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Are books educational?",
# "affirmation": "Are books educational?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Was the Great Wall of China built by humans?",
# "affirmation": "Was the Great Wall of China built by humans?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Are pianos musical instruments?",
# "affirmation": "Are pianos musical instruments?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Has Bill Clinton been President of the United States?",
# "affirmation": "Has Bill Clinton been President of the United States?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is a whale a mammal?",
# "affirmation": "Is a whale a mammal?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Are lemons yellow?",
# "affirmation": "Are lemons yellow?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is the South Pole cold?",
# "affirmation": "Is the South Pole cold?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is Africa warm?",
# "affirmation": "Is Africa warm?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is Antarctica cold?",
# "affirmation": "Is Antarctica cold?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is rock is generally harder than wood?",
# "affirmation": "Is rock is generally harder than wood?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Do dogs chase cats?",
# "affirmation": "Do dogs chase cats?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "can humans die from cold temperatures?",
# "affirmation": "can humans die from cold temperatures?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "do people enjoy conversation?",
# "affirmation": "do people enjoy conversation?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is Bill Clinton the President of the United States?",
# "affirmation": "Is Bill Clinton the President of the United States?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Are books a good source of information?",
# "affirmation": "Are books a good source of information?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "are friends different than enemies?",
# "affirmation": "are friends different than enemies?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "are people alive?",
# "affirmation": "are people alive?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Do triangles have 3 sides?",
# "affirmation": "Do triangles have 3 sides?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is Ice cream cold?",
# "affirmation": "Is Ice cream cold?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Are all sides of a square the same length?",
# "affirmation": "Are all sides of a square the same length?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Do all people eat food?",
# "affirmation": "Do all people eat food?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "do dentists repair teeth?",
# "affirmation": "do dentists repair teeth?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is America bigger than Japan?",
# "affirmation": "Is America bigger than Japan?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Do all triangles have three sides?",
# "affirmation": "Do all triangles have three sides?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "A grocery store sales food?",
# "affirmation": "A grocery store sales food?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Does a sunburn cause pain?",
# "affirmation": "Does a sunburn cause pain?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is a computer an invention?",
# "affirmation": "Is a computer an invention?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "have humans visited the moon?",
# "affirmation": "have humans visited the moon?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Are there people in India?",
# "affirmation": "Are there people in India?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Was Einstein a genius?",
# "affirmation": "Was Einstein a genius?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Are we on the planet earth?",
# "affirmation": "Are we on the planet earth?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "do people comb their hair in the morning?",
# "affirmation": "do people comb their hair in the morning?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Does it hurt to lose a friend?",
# "affirmation": "Does it hurt to lose a friend?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Are there people on the earth?",
# "affirmation": "Are there people on the earth?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Was George Washington a president of the United States of America?",
# "affirmation": "Was George Washington a president of the United States of America?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Does an ocean have salt water in it?",
# "affirmation": "Does an ocean have salt water in it?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is night darker than day?",
# "affirmation": "Is night darker than day?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Does a triangle have three sides?",
# "affirmation": "Does a triangle have three sides?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Are peaches fruit?",
# "affirmation": "Are peaches fruit?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Do people urinate?",
# "affirmation": "Do people urinate?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is Germany located in Europe?",
# "affirmation": "Is Germany located in Europe?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Do mirrors reflect light?",
# "affirmation": "Do mirrors reflect light?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Are people born naked?",
# "affirmation": "Are people born naked?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is it hot near the equator?",
# "affirmation": "Is it hot near the equator?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "is paper made from trees?",
# "affirmation": "is paper made from trees?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Can a female have children?",
# "affirmation": "Can a female have children?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Are people born every day?",
# "affirmation": "Are people born every day?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Are shoes worn on the feet?",
# "affirmation": "Are shoes worn on the feet?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "does it get wet when it rains?",
# "affirmation": "does it get wet when it rains?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Are there plants and insects in the rainforest which have no names?",
# "affirmation": "Are there plants and insects in the rainforest which have no names?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Do people eat pigs?",
# "affirmation": "Do people eat pigs?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Do businessmen wear ties?",
# "affirmation": "Do businessmen wear ties?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is New York in the United States?",
# "affirmation": "Is New York in the United States?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Are humans more intelligent than ants?",
# "affirmation": "Are humans more intelligent than ants?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Are ravens black?",
# "affirmation": "Are ravens black?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Are there rats on ships?",
# "affirmation": "Are there rats on ships?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "are lions animals?",
# "affirmation": "are lions animals?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "6 is greater than 5?",
# "affirmation": "6 is greater than 5?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Is water made of hydrogen and oxygen?",
# "affirmation": "Is water made of hydrogen and oxygen?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "is the sky blue on a clear day?",
# "affirmation": "is the sky blue on a clear day?",
# "parsed": (),
# "answer": None,
# },
# {
# "text": "Do most people work during the day?",
# "affirmation": "Do most people work during the day?",
# "parsed": (),
# "answer": None,
# },
]
base_knowledge = {
'icecream': {
"groups": {'noun', 'object', 'comestible', 'sweet'},
},
'hot': {
"groups": {'property', 'temperature'},
},
'summer': {
"groups": {'epoch'},
},
'planet': {
"groups": {'noun', 'group'},
},
'green': {
"groups": {'noun', 'color', 'concept'},
},
'fly': {
"groups": {'verb'},
},
}
def main():
knowledge = KnowledgeBase(
knowledge=base_knowledge,
)
total = len(examples)
for i, (example_type, data) in enumerate(examples):
if example_type == 'full_example':
affirmation = {
'text': data['affirmation'],
'parsed': data['parsed'][1],
}
question = data
show_progbar(i, total, data['affirmation'])
differences = knowledge.train([affirmation])
show_progbar(i, total, data['text'])
differences = knowledge.train([question])
result, _, _ = knowledge.process(data['text'])
if result != data['answer']:
raise AssertionError('{} is not {}'.format(result, data['answer']))
if "after_execution" in data:
for f in data["after_execution"]:
f(knowledge)
elif example_type == 'text_example':
show_progbar(i, total, data['affirmation'])
affirmation = data['affirmation']
show_progbar(i, total, data['question'])
question = data['question']
_, _, _ = knowledge.process(affirmation)
result, _, _ = knowledge.process(question)
if result != data['answer']:
raise AssertionError('{} is not {}'.format(result, data['answer']))
else:
raise NotImplementedError('Example type: {}'.format(example_type))
print("\r\x1b[K", end='')

View File

@ -0,0 +1,4 @@
def dumper(obj):
if isinstance(obj, set):
return list(obj)
return obj

View File

@ -0,0 +1,15 @@
def show_progbar(done, total, msg=''):
total_blocks = 10
blocks_done = (done * total_blocks) // total
blocks_to_go = total_blocks - blocks_done
print('\r\x1b[K' # Go to the start of the line
'\x1b[0m' # Restart the "style"
'|' # Put the first "|"
+ blocks_done * '' # Completed blocks
+ blocks_to_go * ' ' # Uncompleted blocks
+ '\x1b[7m|\x1b[0m' # End the bar
+ ' '
+ msg # Add message
+ '\r' # Go back to the start
, end='')