From ec17fca6cfb98f4dea20bc1c03dbb0d776bf195f Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Tue, 23 May 2017 18:56:04 +0200 Subject: [PATCH 01/27] Add unpassed test. --- naive-nlu/test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/naive-nlu/test.py b/naive-nlu/test.py index 347f6e7..45fd229 100644 --- a/naive-nlu/test.py +++ b/naive-nlu/test.py @@ -47,6 +47,12 @@ examples = [ "text": "a wale can swim", "parsed": ("has-capacity", 'wale', 'swim') }, + { + "text": "if earth is a planet, it is big", + "parsed": ("implies", + ("pertenence-to-group", 'earth', 'planet'), + ("exists-property-with-value", 'earth', 'big')), + }, ] base_knowledge = { From 5297158110c1c6594c89c101b636bf37b96d84a2 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Tue, 23 May 2017 19:04:10 +0200 Subject: [PATCH 02/27] Package as `tree_nlu`. --- naive-nlu/setup.py | 15 ++ naive-nlu/test.py | 157 +----------------- naive-nlu/tree_nlu/__init__.py | 0 naive-nlu/{ => tree_nlu}/depth_meter.py | 2 +- naive-nlu/{ => tree_nlu}/knowledge_base.py | 7 +- .../{ => tree_nlu}/knowledge_evaluation.py | 2 +- .../{ => tree_nlu}/modifiable_property.py | 0 naive-nlu/{ => tree_nlu}/parameters.py | 0 naive-nlu/{ => tree_nlu}/parsing.py | 8 +- naive-nlu/tree_nlu/test.py | 157 ++++++++++++++++++ 10 files changed, 184 insertions(+), 164 deletions(-) create mode 100644 naive-nlu/setup.py create mode 100644 naive-nlu/tree_nlu/__init__.py rename naive-nlu/{ => tree_nlu}/depth_meter.py (92%) rename naive-nlu/{ => tree_nlu}/knowledge_base.py (96%) rename naive-nlu/{ => tree_nlu}/knowledge_evaluation.py (99%) rename naive-nlu/{ => tree_nlu}/modifiable_property.py (100%) rename naive-nlu/{ => tree_nlu}/parameters.py (100%) rename naive-nlu/{ => tree_nlu}/parsing.py (98%) create mode 100644 naive-nlu/tree_nlu/test.py diff --git a/naive-nlu/setup.py b/naive-nlu/setup.py new file mode 100644 index 0000000..8fdc33b --- /dev/null +++ b/naive-nlu/setup.py @@ -0,0 +1,15 @@ +from setuptools import setup + +setup(name='tree_nlu', + version='0.1', + description='Naïve AST based NLU.', + author='kenkeiras', + author_email='kenkeiras@codigoparallevar.com', + packages=['tree_nlu'], + scripts=[ + ], + include_package_data=True, + install_requires = [ + 'jsondiff', + ], + zip_safe=False) diff --git a/naive-nlu/test.py b/naive-nlu/test.py index 45fd229..740652e 100644 --- a/naive-nlu/test.py +++ b/naive-nlu/test.py @@ -1,157 +1,4 @@ -import json -import logging - -logging.getLogger().setLevel(logging.INFO) - -from knowledge_base import KnowledgeBase -from modifiable_property import is_modifiable_property - -examples = [ - { - "text": "icecream is cold", - "parsed": ("exists-property-with-value", 'icecream', 'cold'), - }, - { - "text": "is icecream cold?", - "parsed": ("question", ("exists-property-with-value", 'icecream', 'cold')) - }, - { - "text": "lava is dangerous", - "parsed": ("exists-property-with-value", 'lava', 'dangerous') - }, - { - "text": "is lava dangerous?", - "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')), - }, - { - "text": "earth is a planet", - "parsed": ("pertenence-to-group", 'earth', 'planet'), - }, - { - "text": "io is a moon", - "parsed": ("pertenence-to-group", 'io', 'moon'), - }, - { - "text": "is earth a moon?", - "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')), - }, - { - "text": "Green is a color", - "parsed": ("pertenence-to-group", 'green', 'color'), - }, - { - "text": "a plane can fly", - "parsed": ("has-capacity", 'plane', 'fly') - }, - { - "text": "a wale can swim", - "parsed": ("has-capacity", 'wale', 'swim') - }, - { - "text": "if earth is a planet, it is big", - "parsed": ("implies", - ("pertenence-to-group", 'earth', 'planet'), - ("exists-property-with-value", 'earth', 'big')), - }, -] - -base_knowledge = { - 'icecream': { - "groups": set(['noun', 'object', 'comestible', 'sweet']), - }, - 'lava': { - "groups": set(['noun', 'object']), - }, - 'earth': { - "groups": set(['noun', 'object', 'planet']), - }, - 'io': { - "groups": set(['noun', 'object']), - }, - 'green': { - "groups": set(['noun', 'color', 'concept']), - }, - 'plane': { - "groups": set(['noun', 'object', 'vehicle', 'fast']), - }, - 'car': { - "groups": set(['noun', 'object', 'vehicle', 'slow-ish']), - }, - 'wale': { - "groups": set(['noun', 'object', 'living-being']), - }, - 'cold': { - "groups": set(['property', 'temperature']), - "as_property": "temperature", - }, - 'dangerous': { - "groups": set(['property']), - "as_property": "safety", - }, - 'planet': { - "groups": set(['noun', 'group']), - }, - 'moon': { - "groups": set(['noun', 'group']), - }, - 'color': { - "groups": set(['property', 'group']), - }, - 'fly': { - "groups": set(['verb']), - }, - 'swim': { - "groups": set(['verb']), - }, -} - - -def test_assumption(expectedResponse, knowledge, query): - logging.info("Query: {}".format(query['text'])) - logging.info("Expected: {}".format(expectedResponse)) - - result, abstract_tree, diff = knowledge.process(query['text']) - end_result = result.getter() if is_modifiable_property(result) else result - - logging.info("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result)) - assert(end_result == expectedResponse) - - -def main(): - knowledge = KnowledgeBase( - knowledge=base_knowledge, - ) - - differences = knowledge.train(examples) - - logging.info("----") - logging.info(differences()) - logging.info("----") - - test_assumption(True, knowledge, {'text': 'earth is a planet'}) - test_assumption(True, knowledge, {'text': 'is lava dangerous?'}) - for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: - row = test['text'] - result, inferred_tree, differences = knowledge.process(row) - - logging.info("result:", result) - logging.info(differences()) - logging.info("---") - logging.info('-----') - logging.info(json.dumps(sorted(knowledge.knowledge.keys()), indent=4)) - logging.info('-----') - - queryTrue = { - "text": "is io a moon?", - "parsed": ("question", ("pertenence-to-group", "io", "moon")) - } - queryFalse = { - "text": "is io a planet?", - "parsed": ("question", ("pertenence-to-group", "io", "planet")) - } - - test_assumption(False, knowledge, queryFalse) - test_assumption(True, knowledge, queryTrue) +from tree_nlu import test if __name__ == '__main__': - main() + test.main() diff --git a/naive-nlu/tree_nlu/__init__.py b/naive-nlu/tree_nlu/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/naive-nlu/depth_meter.py b/naive-nlu/tree_nlu/depth_meter.py similarity index 92% rename from naive-nlu/depth_meter.py rename to naive-nlu/tree_nlu/depth_meter.py index db1dab9..3ee624f 100644 --- a/naive-nlu/depth_meter.py +++ b/naive-nlu/tree_nlu/depth_meter.py @@ -1,5 +1,5 @@ import sys -import parameters +from . import parameters def show_depth(depth: int, zoom: int=2): offset = int((parameters.MAX_RECURSIONS - depth) / (2 / zoom)) diff --git a/naive-nlu/knowledge_base.py b/naive-nlu/tree_nlu/knowledge_base.py similarity index 96% rename from naive-nlu/knowledge_base.py rename to naive-nlu/tree_nlu/knowledge_base.py index 31fe4d2..4c27700 100644 --- a/naive-nlu/knowledge_base.py +++ b/naive-nlu/tree_nlu/knowledge_base.py @@ -1,9 +1,10 @@ import copy import logging -import parsing -import knowledge_evaluation -from modifiable_property import is_modifiable_property + +from . import parsing +from . import knowledge_evaluation +from .modifiable_property import is_modifiable_property def diff_knowledge(before, after): diff --git a/naive-nlu/knowledge_evaluation.py b/naive-nlu/tree_nlu/knowledge_evaluation.py similarity index 99% rename from naive-nlu/knowledge_evaluation.py rename to naive-nlu/tree_nlu/knowledge_evaluation.py index 4485adb..a24c07d 100644 --- a/naive-nlu/knowledge_evaluation.py +++ b/naive-nlu/tree_nlu/knowledge_evaluation.py @@ -1,4 +1,4 @@ -from modifiable_property import ( +from .modifiable_property import ( ModifiableProperty, ModifiablePropertyWithAst, is_modifiable_property, diff --git a/naive-nlu/modifiable_property.py b/naive-nlu/tree_nlu/modifiable_property.py similarity index 100% rename from naive-nlu/modifiable_property.py rename to naive-nlu/tree_nlu/modifiable_property.py diff --git a/naive-nlu/parameters.py b/naive-nlu/tree_nlu/parameters.py similarity index 100% rename from naive-nlu/parameters.py rename to naive-nlu/tree_nlu/parameters.py diff --git a/naive-nlu/parsing.py b/naive-nlu/tree_nlu/parsing.py similarity index 98% rename from naive-nlu/parsing.py rename to naive-nlu/tree_nlu/parsing.py index 3b6bf09..fa16a33 100644 --- a/naive-nlu/parsing.py +++ b/naive-nlu/tree_nlu/parsing.py @@ -1,16 +1,16 @@ #!/usr/bin/env python -import knowledge_evaluation +from . import knowledge_evaluation -import depth_meter +from . import depth_meter import logging import re import copy from functools import reduce from typing import List -from modifiable_property import ModifiableProperty -import parameters +from .modifiable_property import ModifiableProperty +from . import parameters # TODO: more flexible tokenization def to_tokens(text): diff --git a/naive-nlu/tree_nlu/test.py b/naive-nlu/tree_nlu/test.py new file mode 100644 index 0000000..fbd24d8 --- /dev/null +++ b/naive-nlu/tree_nlu/test.py @@ -0,0 +1,157 @@ +import json +import logging + +logging.getLogger().setLevel(logging.INFO) + +from .knowledge_base import KnowledgeBase +from .modifiable_property import is_modifiable_property + +examples = [ + { + "text": "icecream is cold", + "parsed": ("exists-property-with-value", 'icecream', 'cold'), + }, + { + "text": "is icecream cold?", + "parsed": ("question", ("exists-property-with-value", 'icecream', 'cold')) + }, + { + "text": "lava is dangerous", + "parsed": ("exists-property-with-value", 'lava', 'dangerous') + }, + { + "text": "is lava dangerous?", + "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')), + }, + { + "text": "earth is a planet", + "parsed": ("pertenence-to-group", 'earth', 'planet'), + }, + { + "text": "io is a moon", + "parsed": ("pertenence-to-group", 'io', 'moon'), + }, + { + "text": "is earth a moon?", + "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')), + }, + { + "text": "Green is a color", + "parsed": ("pertenence-to-group", 'green', 'color'), + }, + { + "text": "a plane can fly", + "parsed": ("has-capacity", 'plane', 'fly') + }, + { + "text": "a wale can swim", + "parsed": ("has-capacity", 'wale', 'swim') + }, + { + "text": "if earth is a planet, it is big", + "parsed": ("implies", + ("pertenence-to-group", 'earth', 'planet'), + ("exists-property-with-value", 'earth', 'big')), + }, +] + +base_knowledge = { + 'icecream': { + "groups": set(['noun', 'object', 'comestible', 'sweet']), + }, + 'lava': { + "groups": set(['noun', 'object']), + }, + 'earth': { + "groups": set(['noun', 'object', 'planet']), + }, + 'io': { + "groups": set(['noun', 'object']), + }, + 'green': { + "groups": set(['noun', 'color', 'concept']), + }, + 'plane': { + "groups": set(['noun', 'object', 'vehicle', 'fast']), + }, + 'car': { + "groups": set(['noun', 'object', 'vehicle', 'slow-ish']), + }, + 'wale': { + "groups": set(['noun', 'object', 'living-being']), + }, + 'cold': { + "groups": set(['property', 'temperature']), + "as_property": "temperature", + }, + 'dangerous': { + "groups": set(['property']), + "as_property": "safety", + }, + 'planet': { + "groups": set(['noun', 'group']), + }, + 'moon': { + "groups": set(['noun', 'group']), + }, + 'color': { + "groups": set(['property', 'group']), + }, + 'fly': { + "groups": set(['verb']), + }, + 'swim': { + "groups": set(['verb']), + }, +} + + +def test_assumption(expectedResponse, knowledge, query): + logging.info("Query: {}".format(query['text'])) + logging.info("Expected: {}".format(expectedResponse)) + + result, abstract_tree, diff = knowledge.process(query['text']) + end_result = result.getter() if is_modifiable_property(result) else result + + logging.info("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result)) + assert(end_result == expectedResponse) + + +def main(): + knowledge = KnowledgeBase( + knowledge=base_knowledge, + ) + + differences = knowledge.train(examples) + + logging.info("----") + logging.info(differences()) + logging.info("----") + + test_assumption(True, knowledge, {'text': 'earth is a planet'}) + test_assumption(True, knowledge, {'text': 'is lava dangerous?'}) + for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: + row = test['text'] + result, inferred_tree, differences = knowledge.process(row) + + logging.info("result:", result) + logging.info(differences()) + logging.info("---") + logging.info('-----') + logging.info(json.dumps(sorted(knowledge.knowledge.keys()), indent=4)) + logging.info('-----') + + queryTrue = { + "text": "is io a moon?", + "parsed": ("question", ("pertenence-to-group", "io", "moon")) + } + queryFalse = { + "text": "is io a planet?", + "parsed": ("question", ("pertenence-to-group", "io", "planet")) + } + + test_assumption(False, knowledge, queryFalse) + test_assumption(True, knowledge, queryTrue) + +if __name__ == '__main__': + main() From 23ae882161dc41ac3de3dbc48e50a1f0e228f4b9 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Tue, 23 May 2017 21:57:51 +0200 Subject: [PATCH 03/27] Separated basic test. --- naive-nlu/tree_nlu/test.py | 161 +++--------------------------- naive-nlu/tree_nlu/tests/basic.py | 150 ++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+), 148 deletions(-) create mode 100644 naive-nlu/tree_nlu/tests/basic.py diff --git a/naive-nlu/tree_nlu/test.py b/naive-nlu/tree_nlu/test.py index fbd24d8..d97c2f2 100644 --- a/naive-nlu/tree_nlu/test.py +++ b/naive-nlu/tree_nlu/test.py @@ -1,157 +1,22 @@ -import json import logging +from .tests import basic -logging.getLogger().setLevel(logging.INFO) - -from .knowledge_base import KnowledgeBase -from .modifiable_property import is_modifiable_property - -examples = [ - { - "text": "icecream is cold", - "parsed": ("exists-property-with-value", 'icecream', 'cold'), - }, - { - "text": "is icecream cold?", - "parsed": ("question", ("exists-property-with-value", 'icecream', 'cold')) - }, - { - "text": "lava is dangerous", - "parsed": ("exists-property-with-value", 'lava', 'dangerous') - }, - { - "text": "is lava dangerous?", - "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')), - }, - { - "text": "earth is a planet", - "parsed": ("pertenence-to-group", 'earth', 'planet'), - }, - { - "text": "io is a moon", - "parsed": ("pertenence-to-group", 'io', 'moon'), - }, - { - "text": "is earth a moon?", - "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')), - }, - { - "text": "Green is a color", - "parsed": ("pertenence-to-group", 'green', 'color'), - }, - { - "text": "a plane can fly", - "parsed": ("has-capacity", 'plane', 'fly') - }, - { - "text": "a wale can swim", - "parsed": ("has-capacity", 'wale', 'swim') - }, - { - "text": "if earth is a planet, it is big", - "parsed": ("implies", - ("pertenence-to-group", 'earth', 'planet'), - ("exists-property-with-value", 'earth', 'big')), - }, -] - -base_knowledge = { - 'icecream': { - "groups": set(['noun', 'object', 'comestible', 'sweet']), - }, - 'lava': { - "groups": set(['noun', 'object']), - }, - 'earth': { - "groups": set(['noun', 'object', 'planet']), - }, - 'io': { - "groups": set(['noun', 'object']), - }, - 'green': { - "groups": set(['noun', 'color', 'concept']), - }, - 'plane': { - "groups": set(['noun', 'object', 'vehicle', 'fast']), - }, - 'car': { - "groups": set(['noun', 'object', 'vehicle', 'slow-ish']), - }, - 'wale': { - "groups": set(['noun', 'object', 'living-being']), - }, - 'cold': { - "groups": set(['property', 'temperature']), - "as_property": "temperature", - }, - 'dangerous': { - "groups": set(['property']), - "as_property": "safety", - }, - 'planet': { - "groups": set(['noun', 'group']), - }, - 'moon': { - "groups": set(['noun', 'group']), - }, - 'color': { - "groups": set(['property', 'group']), - }, - 'fly': { - "groups": set(['verb']), - }, - 'swim': { - "groups": set(['verb']), - }, -} - - -def test_assumption(expectedResponse, knowledge, query): - logging.info("Query: {}".format(query['text'])) - logging.info("Expected: {}".format(expectedResponse)) - - result, abstract_tree, diff = knowledge.process(query['text']) - end_result = result.getter() if is_modifiable_property(result) else result - - logging.info("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result)) - assert(end_result == expectedResponse) +logging.getLogger().setLevel(logging.WARNING) +tests = ( + ("basic", basic), +) def main(): - knowledge = KnowledgeBase( - knowledge=base_knowledge, - ) + for test_name, test_module in tests: + try: + test_module.main() + print(" ✓ {}".format(test_name)) + except AssertionError: + print(" ✗ {}".format(test_name)) + except Exception as e: + print(" ! {} {}".format(test_name, e)) - differences = knowledge.train(examples) - - logging.info("----") - logging.info(differences()) - logging.info("----") - - test_assumption(True, knowledge, {'text': 'earth is a planet'}) - test_assumption(True, knowledge, {'text': 'is lava dangerous?'}) - for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: - row = test['text'] - result, inferred_tree, differences = knowledge.process(row) - - logging.info("result:", result) - logging.info(differences()) - logging.info("---") - logging.info('-----') - logging.info(json.dumps(sorted(knowledge.knowledge.keys()), indent=4)) - logging.info('-----') - - queryTrue = { - "text": "is io a moon?", - "parsed": ("question", ("pertenence-to-group", "io", "moon")) - } - queryFalse = { - "text": "is io a planet?", - "parsed": ("question", ("pertenence-to-group", "io", "planet")) - } - - test_assumption(False, knowledge, queryFalse) - test_assumption(True, knowledge, queryTrue) if __name__ == '__main__': main() diff --git a/naive-nlu/tree_nlu/tests/basic.py b/naive-nlu/tree_nlu/tests/basic.py new file mode 100644 index 0000000..eb03ad7 --- /dev/null +++ b/naive-nlu/tree_nlu/tests/basic.py @@ -0,0 +1,150 @@ +import logging +import json + +from ..knowledge_base import KnowledgeBase +from ..modifiable_property import is_modifiable_property + +examples = [ + { + "text": "icecream is cold", + "parsed": ("exists-property-with-value", 'icecream', 'cold'), + }, + { + "text": "is icecream cold?", + "parsed": ("question", ("exists-property-with-value", 'icecream', 'cold')) + }, + { + "text": "lava is dangerous", + "parsed": ("exists-property-with-value", 'lava', 'dangerous') + }, + { + "text": "is lava dangerous?", + "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')), + }, + { + "text": "earth is a planet", + "parsed": ("pertenence-to-group", 'earth', 'planet'), + }, + { + "text": "io is a moon", + "parsed": ("pertenence-to-group", 'io', 'moon'), + }, + { + "text": "is earth a moon?", + "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')), + }, + { + "text": "Green is a color", + "parsed": ("pertenence-to-group", 'green', 'color'), + }, + { + "text": "a plane can fly", + "parsed": ("has-capacity", 'plane', 'fly') + }, + { + "text": "a wale can swim", + "parsed": ("has-capacity", 'wale', 'swim') + }, + # { + # "text": "if earth is a planet, it is big", + # "parsed": ("implies", + # ("pertenence-to-group", 'earth', 'planet'), + # ("exists-property-with-value", 'earth', 'big')), + # }, +] + +base_knowledge = { + 'icecream': { + "groups": set(['noun', 'object', 'comestible', 'sweet']), + }, + 'lava': { + "groups": set(['noun', 'object']), + }, + 'earth': { + "groups": set(['noun', 'object', 'planet']), + }, + 'io': { + "groups": set(['noun', 'object']), + }, + 'green': { + "groups": set(['noun', 'color', 'concept']), + }, + 'plane': { + "groups": set(['noun', 'object', 'vehicle', 'fast']), + }, + 'car': { + "groups": set(['noun', 'object', 'vehicle', 'slow-ish']), + }, + 'wale': { + "groups": set(['noun', 'object', 'living-being']), + }, + 'cold': { + "groups": set(['property', 'temperature']), + "as_property": "temperature", + }, + 'dangerous': { + "groups": set(['property']), + "as_property": "safety", + }, + 'planet': { + "groups": set(['noun', 'group']), + }, + 'moon': { + "groups": set(['noun', 'group']), + }, + 'color': { + "groups": set(['property', 'group']), + }, + 'fly': { + "groups": set(['verb']), + }, + 'swim': { + "groups": set(['verb']), + }, +} + +def test_assumption(expectedResponse, knowledge, query): + logging.info("Query: {}".format(query['text'])) + logging.info("Expected: {}".format(expectedResponse)) + + result, abstract_tree, diff = knowledge.process(query['text']) + end_result = result.getter() if is_modifiable_property(result) else result + + logging.info("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result)) + assert(end_result == expectedResponse) + +def main(): + knowledge = KnowledgeBase( + knowledge=base_knowledge, + ) + + differences = knowledge.train(examples) + + logging.info("----") + logging.info(differences()) + logging.info("----") + + test_assumption(True, knowledge, {'text': 'earth is a planet'}) + test_assumption(True, knowledge, {'text': 'is lava dangerous?'}) + for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]: + row = test['text'] + result, inferred_tree, differences = knowledge.process(row) + + logging.info("result:", result) + logging.info(differences()) + logging.info("---") + logging.info('-----') + logging.info(json.dumps(sorted(knowledge.knowledge.keys()), indent=4)) + logging.info('-----') + + queryTrue = { + "text": "is io a moon?", + "parsed": ("question", ("pertenence-to-group", "io", "moon")) + } + queryFalse = { + "text": "is io a planet?", + "parsed": ("question", ("pertenence-to-group", "io", "planet")) + } + + test_assumption(False, knowledge, queryFalse) + test_assumption(True, knowledge, queryTrue) From d6628101deab5647d8abc6391b5c26f146b979ce Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Tue, 23 May 2017 22:16:27 +0200 Subject: [PATCH 04/27] Base gac 100. --- naive-nlu/tree_nlu/test.py | 8 +- naive-nlu/tree_nlu/tests/basic.py | 3 +- naive-nlu/tree_nlu/tests/gac_100.py | 637 ++++++++++++++++++++++++++++ 3 files changed, 644 insertions(+), 4 deletions(-) create mode 100644 naive-nlu/tree_nlu/tests/gac_100.py diff --git a/naive-nlu/tree_nlu/test.py b/naive-nlu/tree_nlu/test.py index d97c2f2..c7e1a6e 100644 --- a/naive-nlu/tree_nlu/test.py +++ b/naive-nlu/tree_nlu/test.py @@ -1,10 +1,12 @@ import logging from .tests import basic +from .tests import gac_100 logging.getLogger().setLevel(logging.WARNING) tests = ( ("basic", basic), + ("gac 100", gac_100), ) def main(): @@ -12,11 +14,11 @@ def main(): try: test_module.main() print(" ✓ {}".format(test_name)) - except AssertionError: - print(" ✗ {}".format(test_name)) + except AssertionError as ae: + print(" ✗ {}: {}".format(test_name, ae.args[0])) except Exception as e: print(" ! {} {}".format(test_name, e)) - + raise if __name__ == '__main__': main() diff --git a/naive-nlu/tree_nlu/tests/basic.py b/naive-nlu/tree_nlu/tests/basic.py index eb03ad7..ba09ce2 100644 --- a/naive-nlu/tree_nlu/tests/basic.py +++ b/naive-nlu/tree_nlu/tests/basic.py @@ -111,7 +111,8 @@ def test_assumption(expectedResponse, knowledge, query): end_result = result.getter() if is_modifiable_property(result) else result logging.info("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result)) - assert(end_result == expectedResponse) + if end_result != expectedResponse: + raise AssertionError('{} is not {}'.format(end_result, expectedResponse)) def main(): knowledge = KnowledgeBase( diff --git a/naive-nlu/tree_nlu/tests/gac_100.py b/naive-nlu/tree_nlu/tests/gac_100.py new file mode 100644 index 0000000..77d7139 --- /dev/null +++ b/naive-nlu/tree_nlu/tests/gac_100.py @@ -0,0 +1,637 @@ +from ..knowledge_base import KnowledgeBase + +examples = [ + { + "text": "is icecream cold?", + "affirmation": "icecream is cold", + "parsed": ("question", ("exists-property-with-value", 'icecream', 'cold')), + "answer": True, + }, + # { + # "text": "is earth a planet?", + # "affirmation": "is earth a planet?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is green a color?", + # "affirmation": "Is green a color?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "do airplanes fly?", + # "affirmation": "do airplanes fly?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is it hot during the summer?", + # "affirmation": "Is it hot during the summer?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "is chile in south america ?", + # "affirmation": "is chile in south america ?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Was Socrates a man?", + # "affirmation": "Was Socrates a man?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Computers use electricity?", + # "affirmation": "Computers use electricity?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "The dominant language in france is french?", + # "affirmation": "The dominant language in france is french?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "was abraham lincoln once president of the united states?", + # "affirmation": "was abraham lincoln once president of the united states?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is milk white?", + # "affirmation": "Is milk white?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "do people have emotions?", + # "affirmation": "do people have emotions?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "do objects appear smaller as they move away from you?", + # "affirmation": "do objects appear smaller as they move away from you?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Does the human species have a male and female gender?", + # "affirmation": "Does the human species have a male and female gender?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is a mountain mostly made of rock?", + # "affirmation": "Is a mountain mostly made of rock?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "is sun microsystems a computer company?", + # "affirmation": "is sun microsystems a computer company?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Do you see with your eyes and smell with your nose?", + # "affirmation": "Do you see with your eyes and smell with your nose?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is smoking bad for your health?", + # "affirmation": "Is smoking bad for your health?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Does a dog have four legs?", + # "affirmation": "Does a dog have four legs?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Do mammals have hearts?", + # "affirmation": "Do mammals have hearts?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "is the Earth a planet?", + # "affirmation": "is the Earth a planet?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is water a liquid?", + # "affirmation": "Is water a liquid?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is Bugs Bunny a cartoon character?", + # "affirmation": "Is Bugs Bunny a cartoon character?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Do Humans communicate by Telephone?", + # "affirmation": "Do Humans communicate by Telephone?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "is beer a drink ?", + # "affirmation": "is beer a drink ?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "are there 12 months in a year?", + # "affirmation": "are there 12 months in a year?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "does the sun hurt your eyes when you look at it?", + # "affirmation": "does the sun hurt your eyes when you look at it?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Do most cars have doors?", + # "affirmation": "Do most cars have doors?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "is orange both a fruit and a colour?", + # "affirmation": "is orange both a fruit and a colour?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is water a necessity?", + # "affirmation": "Is water a necessity?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Do CDs have better quality sound than Cassettes?", + # "affirmation": "Do CDs have better quality sound than Cassettes?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "do animals die?", + # "affirmation": "do animals die?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is the arctic cold?", + # "affirmation": "Is the arctic cold?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Do people have 2 eyes?", + # "affirmation": "Do people have 2 eyes?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "does a person have a brain?", + # "affirmation": "does a person have a brain?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is the rain wet?", + # "affirmation": "Is the rain wet?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is division a mathematical operation?", + # "affirmation": "Is division a mathematical operation?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "is 400 greater than 399?", + # "affirmation": "is 400 greater than 399?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "is magenta a color?", + # "affirmation": "is magenta a color?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Are books educational?", + # "affirmation": "Are books educational?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Was the Great Wall of China built by humans?", + # "affirmation": "Was the Great Wall of China built by humans?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Are pianos musical instruments?", + # "affirmation": "Are pianos musical instruments?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Has Bill Clinton been President of the United States?", + # "affirmation": "Has Bill Clinton been President of the United States?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is a whale a mammal?", + # "affirmation": "Is a whale a mammal?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Are lemons yellow?", + # "affirmation": "Are lemons yellow?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is the South Pole cold?", + # "affirmation": "Is the South Pole cold?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is Africa warm?", + # "affirmation": "Is Africa warm?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is Antarctica cold?", + # "affirmation": "Is Antarctica cold?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is rock is generally harder than wood?", + # "affirmation": "Is rock is generally harder than wood?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Do dogs chase cats?", + # "affirmation": "Do dogs chase cats?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "can humans die from cold temperatures?", + # "affirmation": "can humans die from cold temperatures?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "do people enjoy conversation?", + # "affirmation": "do people enjoy conversation?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is Bill Clinton the President of the United States?", + # "affirmation": "Is Bill Clinton the President of the United States?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Are books a good source of information?", + # "affirmation": "Are books a good source of information?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "are friends different than enemies?", + # "affirmation": "are friends different than enemies?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "are people alive?", + # "affirmation": "are people alive?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Do triangles have 3 sides?", + # "affirmation": "Do triangles have 3 sides?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is Ice cream cold?", + # "affirmation": "Is Ice cream cold?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Are all sides of a square the same length?", + # "affirmation": "Are all sides of a square the same length?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Do all people eat food?", + # "affirmation": "Do all people eat food?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "do dentists repair teeth?", + # "affirmation": "do dentists repair teeth?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is America bigger than Japan?", + # "affirmation": "Is America bigger than Japan?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Do all triangles have three sides?", + # "affirmation": "Do all triangles have three sides?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "A grocery store sales food?", + # "affirmation": "A grocery store sales food?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Does a sunburn cause pain?", + # "affirmation": "Does a sunburn cause pain?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is a computer an invention?", + # "affirmation": "Is a computer an invention?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "have humans visited the moon?", + # "affirmation": "have humans visited the moon?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Are there people in India?", + # "affirmation": "Are there people in India?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Was Einstein a genius?", + # "affirmation": "Was Einstein a genius?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Are we on the planet earth?", + # "affirmation": "Are we on the planet earth?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "do people comb their hair in the morning?", + # "affirmation": "do people comb their hair in the morning?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Does it hurt to lose a friend?", + # "affirmation": "Does it hurt to lose a friend?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Are there people on the earth?", + # "affirmation": "Are there people on the earth?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Was George Washington a president of the United States of America?", + # "affirmation": "Was George Washington a president of the United States of America?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Does an ocean have salt water in it?", + # "affirmation": "Does an ocean have salt water in it?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is night darker than day?", + # "affirmation": "Is night darker than day?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Does a triangle have three sides?", + # "affirmation": "Does a triangle have three sides?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Are peaches fruit?", + # "affirmation": "Are peaches fruit?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Do people urinate?", + # "affirmation": "Do people urinate?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is Germany located in Europe?", + # "affirmation": "Is Germany located in Europe?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Do mirrors reflect light?", + # "affirmation": "Do mirrors reflect light?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Are people born naked?", + # "affirmation": "Are people born naked?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is it hot near the equator?", + # "affirmation": "Is it hot near the equator?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "is paper made from trees?", + # "affirmation": "is paper made from trees?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Can a female have children?", + # "affirmation": "Can a female have children?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Are people born every day?", + # "affirmation": "Are people born every day?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Are shoes worn on the feet?", + # "affirmation": "Are shoes worn on the feet?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "does it get wet when it rains?", + # "affirmation": "does it get wet when it rains?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Are there plants and insects in the rainforest which have no names?", + # "affirmation": "Are there plants and insects in the rainforest which have no names?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Do people eat pigs?", + # "affirmation": "Do people eat pigs?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Do businessmen wear ties?", + # "affirmation": "Do businessmen wear ties?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is New York in the United States?", + # "affirmation": "Is New York in the United States?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Are humans more intelligent than ants?", + # "affirmation": "Are humans more intelligent than ants?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Are ravens black?", + # "affirmation": "Are ravens black?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Are there rats on ships?", + # "affirmation": "Are there rats on ships?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "are lions animals?", + # "affirmation": "are lions animals?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "6 is greater than 5?", + # "affirmation": "6 is greater than 5?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Is water made of hydrogen and oxygen?", + # "affirmation": "Is water made of hydrogen and oxygen?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "is the sky blue on a clear day?", + # "affirmation": "is the sky blue on a clear day?", + # "parsed": (), + # "answer": None, + # }, + # { + # "text": "Do most people work during the day?", + # "affirmation": "Do most people work during the day?", + # "parsed": (), + # "answer": None, + # }, +] + +base_knowledge = { + 'icecream': { + "groups": set(['noun', 'object', 'comestible', 'sweet']), + }, + "cold": { + "groups": set(['property', 'temperature']), + "as_property": "temperature", + } +} + +def main(): + knowledge = KnowledgeBase( + knowledge=base_knowledge, + ) + + affirmations = [ + { + 'text': x['affirmation'], + 'parsed': x['parsed'][1], + } + for x in examples + ] + questions = examples + + differences = knowledge.train(affirmations) + differences = knowledge.train(questions) + + for example in examples: + result, _, _ = knowledge.process(example['text']) + + if result != example['answer']: + raise AssertionError('{} is not {}'.format(result, example['answer'])) From 6693b7deb01a34db33816b8a3b18734291104b1c Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Tue, 23 May 2017 23:16:19 +0200 Subject: [PATCH 05/27] Remove need from `as_property` info. Probably this can be improved upon if the data is later analyzed with it's similars. --- naive-nlu/tree_nlu/knowledge_base.py | 1 + naive-nlu/tree_nlu/knowledge_evaluation.py | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/naive-nlu/tree_nlu/knowledge_base.py b/naive-nlu/tree_nlu/knowledge_base.py index 4c27700..e00bc0d 100644 --- a/naive-nlu/tree_nlu/knowledge_base.py +++ b/naive-nlu/tree_nlu/knowledge_base.py @@ -49,6 +49,7 @@ class KnowledgeBase(object): def process(self, row): + row = row.lower() knowledge_before = copy.deepcopy(self.knowledge) logging.info("\x1b[7;32m> {} \x1b[0m".format(row)) tokens = parsing.to_tokens(row) diff --git a/naive-nlu/tree_nlu/knowledge_evaluation.py b/naive-nlu/tree_nlu/knowledge_evaluation.py index a24c07d..ed48f85 100644 --- a/naive-nlu/tree_nlu/knowledge_evaluation.py +++ b/naive-nlu/tree_nlu/knowledge_evaluation.py @@ -41,17 +41,32 @@ def get_subquery_type(knowledge_base, atom): def property_for_value(knowledge_base, value): - return knowledge_base[value]['as_property'] + if 'as_property' in knowledge_base[value]: + return knowledge_base[value]['as_property'] + + return knowledge_base[value].get('groups', set(['noun'])) def modifiable_property_from_property(prop, path, value): def getter(): nonlocal prop, path, value - return (path in prop) and prop[path] == value + if isinstance(path, set): + # If the property is from a set, it's true if any possible + # path has a element as true + return any(map(lambda possible_path: ((possible_path in prop) + and + (prop[possible_path] == value)), + path)) + else: + return (path in prop) and prop[path] == value def setter(): nonlocal prop, path, value - prop[path] = value + if isinstance(path, set): + for possible_path in path: + prop[possible_path] = value + else: + prop[path] = value return ModifiableProperty( getter=getter, From 460ad73bbafc636f726584d5ce83392fb5c6a726 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Tue, 23 May 2017 23:17:44 +0200 Subject: [PATCH 06/27] Handle the possibility of remixes not working. --- naive-nlu/tree_nlu/parsing.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/naive-nlu/tree_nlu/parsing.py b/naive-nlu/tree_nlu/parsing.py index fa16a33..080aaa0 100644 --- a/naive-nlu/tree_nlu/parsing.py +++ b/naive-nlu/tree_nlu/parsing.py @@ -295,6 +295,8 @@ def reprocess_language_knowledge(knowledge_base, examples): def reverse_remix(tree_section, remix): result_section = [] for origin in remix: + if origin >= len(tree_section): + return None result_section.append(copy.deepcopy(tree_section[origin])) return result_section + tree_section[len(remix):] @@ -332,6 +334,9 @@ def resolve_fit(knowledge, fit, remaining_recursions): else: ((result_type, remixer), tokens) = element remixed_tokens = reverse_remix(tokens, remixer) + if remixed_tokens is None: + return None + minifit = get_fit(knowledge, remixed_tokens, remaining_recursions - 1) if minifit is None: return None From 586ac76d1fb9f6a2aa9458bd873ecc3bf71c6049 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Tue, 23 May 2017 23:18:02 +0200 Subject: [PATCH 07/27] Default to ERROR logging on tests. --- naive-nlu/tree_nlu/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/naive-nlu/tree_nlu/test.py b/naive-nlu/tree_nlu/test.py index c7e1a6e..021e2fc 100644 --- a/naive-nlu/tree_nlu/test.py +++ b/naive-nlu/tree_nlu/test.py @@ -2,7 +2,7 @@ import logging from .tests import basic from .tests import gac_100 -logging.getLogger().setLevel(logging.WARNING) +logging.getLogger().setLevel(logging.ERROR) tests = ( ("basic", basic), From e6e81464780c2e3fa367b0efd53fdd2f895c4da2 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Tue, 23 May 2017 23:24:44 +0200 Subject: [PATCH 08/27] Allow learning from unparsed data in tests. --- naive-nlu/tree_nlu/knowledge_evaluation.py | 17 ++- naive-nlu/tree_nlu/tests/gac_100.py | 126 +++++++++++++-------- 2 files changed, 93 insertions(+), 50 deletions(-) diff --git a/naive-nlu/tree_nlu/knowledge_evaluation.py b/naive-nlu/tree_nlu/knowledge_evaluation.py index ed48f85..bcce527 100644 --- a/naive-nlu/tree_nlu/knowledge_evaluation.py +++ b/naive-nlu/tree_nlu/knowledge_evaluation.py @@ -41,10 +41,21 @@ def get_subquery_type(knowledge_base, atom): def property_for_value(knowledge_base, value): - if 'as_property' in knowledge_base[value]: - return knowledge_base[value]['as_property'] + if value in knowledge_base: + # Annotate the property as property + groups = knowledge_base[value].get('groups', set(['property'])) + groups.add('property') + knowledge_base[value]['groups'] = groups - return knowledge_base[value].get('groups', set(['noun'])) + # And find the property "name" + if 'as_property' in knowledge_base[value]: + return knowledge_base[value]['as_property'] + + return knowledge_base[value].get('groups', set(['property'])) + else: + # Consider that any property is... a property + knowledge_base[value] = {'groups': {'property'}} + return {'property'} def modifiable_property_from_property(prop, path, value): diff --git a/naive-nlu/tree_nlu/tests/gac_100.py b/naive-nlu/tree_nlu/tests/gac_100.py index 77d7139..ef68dfb 100644 --- a/naive-nlu/tree_nlu/tests/gac_100.py +++ b/naive-nlu/tree_nlu/tests/gac_100.py @@ -1,30 +1,38 @@ from ..knowledge_base import KnowledgeBase examples = [ - { - "text": "is icecream cold?", - "affirmation": "icecream is cold", - "parsed": ("question", ("exists-property-with-value", 'icecream', 'cold')), - "answer": True, - }, - # { - # "text": "is earth a planet?", - # "affirmation": "is earth a planet?", - # "parsed": (), - # "answer": None, - # }, - # { - # "text": "Is green a color?", - # "affirmation": "Is green a color?", - # "parsed": (), - # "answer": None, - # }, - # { - # "text": "do airplanes fly?", - # "affirmation": "do airplanes fly?", - # "parsed": (), - # "answer": None, - # }, + ('full_example', + { + "text": "is icecream cold?", + "affirmation": "icecream is cold", + "parsed": ("question", + ("exists-property-with-value", 'icecream', 'cold')), + "answer": True, + }), + ('full_example', + { + "text": "is earth a planet?", + "affirmation": "earth is a planet", + "parsed": ("question", + ("pertenence-to-group", 'earth', 'planet')), + "answer": True, + }), + ('full_example', + { + "text": "Is green a color?", + "affirmation": "green is a color", + "parsed": ("question", + ("pertenence-to-group", 'green', 'color')), + "answer": True, + }), + ('full_example', + { + "text": "do airplanes fly?", + "affirmation": "airplanes fly", + "parsed": ("question", + ("has-capacity", 'plane', 'fly')), + "answer": True, + }), # { # "text": "Is it hot during the summer?", # "affirmation": "Is it hot during the summer?", @@ -61,12 +69,12 @@ examples = [ # "parsed": (), # "answer": None, # }, - # { - # "text": "Is milk white?", - # "affirmation": "Is milk white?", - # "parsed": (), - # "answer": None, - # }, + ('text_example', + { + "question": "Is milk white?", + "affirmation": "milk is white", + "answer": True, + }), # { # "text": "do people have emotions?", # "affirmation": "do people have emotions?", @@ -607,10 +615,24 @@ base_knowledge = { 'icecream': { "groups": set(['noun', 'object', 'comestible', 'sweet']), }, - "cold": { + 'cold': { "groups": set(['property', 'temperature']), - "as_property": "temperature", - } + }, + 'earth': { + "groups": set(['noun', 'object', 'planet']), + }, + 'planet': { + "groups": set(['noun', 'group']), + }, + 'color': { + "groups": set(['property', 'group']), + }, + 'green': { + "groups": set(['noun', 'color', 'concept']), + }, + 'fly': { + "groups": set(['verb']), + }, } def main(): @@ -618,20 +640,30 @@ def main(): knowledge=base_knowledge, ) - affirmations = [ - { - 'text': x['affirmation'], - 'parsed': x['parsed'][1], - } - for x in examples - ] - questions = examples + for example_type, data in examples: + if example_type == 'full_example': + affirmation = { + 'text': data['affirmation'], + 'parsed': data['parsed'][1], + } + question = data + differences = knowledge.train([affirmation]) + differences = knowledge.train([question]) - differences = knowledge.train(affirmations) - differences = knowledge.train(questions) + result, _, _ = knowledge.process(data['text']) - for example in examples: - result, _, _ = knowledge.process(example['text']) + if result != data['answer']: + raise AssertionError('{} is not {}'.format(result, data['answer'])) - if result != example['answer']: - raise AssertionError('{} is not {}'.format(result, example['answer'])) + elif example_type == 'text_example': + affirmation = data['affirmation'] + question = data['question'] + + _, _, _ = knowledge.process(affirmation) + result, _, _ = knowledge.process(question) + + if result != data['answer']: + raise AssertionError('{} is not {}'.format(result, data['answer'])) + + else: + raise NotImplementedError('Example type: {}'.format(example_type)) From 3cfc03373f2f58dfb0c0f540b83ddf33ab3246dd Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Tue, 23 May 2017 23:29:34 +0200 Subject: [PATCH 09/27] Use {set} notation for sets. --- naive-nlu/tree_nlu/knowledge_evaluation.py | 4 +-- naive-nlu/tree_nlu/tests/basic.py | 30 +++++++++++----------- naive-nlu/tree_nlu/tests/gac_100.py | 14 +++++----- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/naive-nlu/tree_nlu/knowledge_evaluation.py b/naive-nlu/tree_nlu/knowledge_evaluation.py index bcce527..9d77e67 100644 --- a/naive-nlu/tree_nlu/knowledge_evaluation.py +++ b/naive-nlu/tree_nlu/knowledge_evaluation.py @@ -43,7 +43,7 @@ def get_subquery_type(knowledge_base, atom): def property_for_value(knowledge_base, value): if value in knowledge_base: # Annotate the property as property - groups = knowledge_base[value].get('groups', set(['property'])) + groups = knowledge_base[value].get('groups', {'property'}) groups.add('property') knowledge_base[value]['groups'] = groups @@ -51,7 +51,7 @@ def property_for_value(knowledge_base, value): if 'as_property' in knowledge_base[value]: return knowledge_base[value]['as_property'] - return knowledge_base[value].get('groups', set(['property'])) + return knowledge_base[value].get('groups', {'property'}) else: # Consider that any property is... a property knowledge_base[value] = {'groups': {'property'}} diff --git a/naive-nlu/tree_nlu/tests/basic.py b/naive-nlu/tree_nlu/tests/basic.py index ba09ce2..450e7e0 100644 --- a/naive-nlu/tree_nlu/tests/basic.py +++ b/naive-nlu/tree_nlu/tests/basic.py @@ -55,51 +55,51 @@ examples = [ base_knowledge = { 'icecream': { - "groups": set(['noun', 'object', 'comestible', 'sweet']), + "groups": {'noun', 'object', 'comestible', 'sweet'}, }, 'lava': { - "groups": set(['noun', 'object']), + "groups": {'noun', 'object'}, }, 'earth': { - "groups": set(['noun', 'object', 'planet']), + "groups": {'noun', 'object', 'planet'}, }, 'io': { - "groups": set(['noun', 'object']), + "groups": {'noun', 'object'}, }, 'green': { - "groups": set(['noun', 'color', 'concept']), + "groups": {'noun', 'color', 'concept'}, }, 'plane': { - "groups": set(['noun', 'object', 'vehicle', 'fast']), + "groups": {'noun', 'object', 'vehicle', 'fast'}, }, 'car': { - "groups": set(['noun', 'object', 'vehicle', 'slow-ish']), + "groups": {'noun', 'object', 'vehicle', 'slow-ish'}, }, 'wale': { - "groups": set(['noun', 'object', 'living-being']), + "groups": {'noun', 'object', 'living-being'}, }, 'cold': { - "groups": set(['property', 'temperature']), + "groups": {'property', 'temperature'}, "as_property": "temperature", }, 'dangerous': { - "groups": set(['property']), + "groups": {'property'}, "as_property": "safety", }, 'planet': { - "groups": set(['noun', 'group']), + "groups": {'noun', 'group'}, }, 'moon': { - "groups": set(['noun', 'group']), + "groups": {'noun', 'group'}, }, 'color': { - "groups": set(['property', 'group']), + "groups": {'property', 'group'}, }, 'fly': { - "groups": set(['verb']), + "groups": {'verb'}, }, 'swim': { - "groups": set(['verb']), + "groups": {'verb'}, }, } diff --git a/naive-nlu/tree_nlu/tests/gac_100.py b/naive-nlu/tree_nlu/tests/gac_100.py index ef68dfb..34452a6 100644 --- a/naive-nlu/tree_nlu/tests/gac_100.py +++ b/naive-nlu/tree_nlu/tests/gac_100.py @@ -613,25 +613,25 @@ examples = [ base_knowledge = { 'icecream': { - "groups": set(['noun', 'object', 'comestible', 'sweet']), + "groups": {'noun', 'object', 'comestible', 'sweet'}, }, 'cold': { - "groups": set(['property', 'temperature']), + "groups": {'property', 'temperature'}, }, 'earth': { - "groups": set(['noun', 'object', 'planet']), + "groups": {'noun', 'object', 'planet'}, }, 'planet': { - "groups": set(['noun', 'group']), + "groups": {'noun', 'group'}, }, 'color': { - "groups": set(['property', 'group']), + "groups": {'property', 'group'}, }, 'green': { - "groups": set(['noun', 'color', 'concept']), + "groups": {'noun', 'color', 'concept'}, }, 'fly': { - "groups": set(['verb']), + "groups": {'verb'}, }, } From 0b52ade6b54ea8849d083b2f03f17f53c1a2570f Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Tue, 23 May 2017 23:32:19 +0200 Subject: [PATCH 10/27] Small colorization on the test interface. --- naive-nlu/tree_nlu/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/naive-nlu/tree_nlu/test.py b/naive-nlu/tree_nlu/test.py index 021e2fc..e6c0102 100644 --- a/naive-nlu/tree_nlu/test.py +++ b/naive-nlu/tree_nlu/test.py @@ -13,11 +13,11 @@ def main(): for test_name, test_module in tests: try: test_module.main() - print(" ✓ {}".format(test_name)) + print(" \x1b[1;32m✓\x1b[0m {}".format(test_name)) except AssertionError as ae: - print(" ✗ {}: {}".format(test_name, ae.args[0])) + print(" \x1b[1;31m✗\x1b[0m {}: {}".format(test_name, ae.args[0])) except Exception as e: - print(" ! {} {}".format(test_name, e)) + print(" \x1b[1;7;31m!\x1b[0m {} {}".format(test_name, e)) raise if __name__ == '__main__': From 22534160c987be849bd65fb4633f0b5054fcbf7e Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Tue, 23 May 2017 23:34:33 +0200 Subject: [PATCH 11/27] On fail or exception exit with non-zero code. Also, on exceptions print the exception stacktrace. --- naive-nlu/tree_nlu/test.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/naive-nlu/tree_nlu/test.py b/naive-nlu/tree_nlu/test.py index e6c0102..92addcb 100644 --- a/naive-nlu/tree_nlu/test.py +++ b/naive-nlu/tree_nlu/test.py @@ -1,3 +1,4 @@ +import traceback import logging from .tests import basic from .tests import gac_100 @@ -10,15 +11,22 @@ tests = ( ) def main(): + failed = False for test_name, test_module in tests: try: test_module.main() print(" \x1b[1;32m✓\x1b[0m {}".format(test_name)) except AssertionError as ae: print(" \x1b[1;31m✗\x1b[0m {}: {}".format(test_name, ae.args[0])) + failed = True + except Exception as e: print(" \x1b[1;7;31m!\x1b[0m {} {}".format(test_name, e)) - raise + failed = True + traceback.print_exc() + + if failed: + exit(1) if __name__ == '__main__': main() From d029ecd91deeeee8679ee0eb3038edfe6c7bdba5 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 24 May 2017 20:13:42 +0200 Subject: [PATCH 12/27] Implication example. --- naive-nlu/tree_nlu/knowledge_evaluation.py | 17 +++++++++++++++++ naive-nlu/tree_nlu/tests/gac_100.py | 20 ++++++++++++++------ 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/naive-nlu/tree_nlu/knowledge_evaluation.py b/naive-nlu/tree_nlu/knowledge_evaluation.py index 9d77e67..eb4e7c9 100644 --- a/naive-nlu/tree_nlu/knowledge_evaluation.py +++ b/naive-nlu/tree_nlu/knowledge_evaluation.py @@ -154,12 +154,29 @@ def question(knowledge_base, elements, subj): return subj.getter() return subj +def implies(knowledge_base, elements, precedent, consequent): + precedent = resolve(knowledge_base, elements, precedent) + consequent = resolve(knowledge_base, elements, consequent) + + if precedent not in knowledge_base: + knowledge_base[precedent] = {} + + if "implications" not in knowledge_base[precedent]: + knowledge_base[precedent]["implications"] = set() + + return modifiable_element_for_existance_in_set( + container=knowledge_base[precedent], + set_name="implications", + element=consequent + ) + knowledge_ingestion = { "exists-property-with-value": exists_property_with_value, "pertenence-to-group": pertenence_to_group, "has-capacity": has_capacity, "question": question, + "implies": implies, } diff --git a/naive-nlu/tree_nlu/tests/gac_100.py b/naive-nlu/tree_nlu/tests/gac_100.py index 34452a6..eb3fe39 100644 --- a/naive-nlu/tree_nlu/tests/gac_100.py +++ b/naive-nlu/tree_nlu/tests/gac_100.py @@ -33,12 +33,14 @@ examples = [ ("has-capacity", 'plane', 'fly')), "answer": True, }), - # { - # "text": "Is it hot during the summer?", - # "affirmation": "Is it hot during the summer?", - # "parsed": (), - # "answer": None, - # }, + ('full_example', + { + "text": "Is it hot during the summer?", + "affirmation": "it is hot during the summer", + "parsed": ("question", + ("implies", 'summer', 'hot')), + "answer": True, + }), # { # "text": "is chile in south america ?", # "affirmation": "is chile in south america ?", @@ -618,6 +620,12 @@ base_knowledge = { 'cold': { "groups": {'property', 'temperature'}, }, + 'hot': { + "groups": {'property', 'temperature'}, + }, + 'summer': { + "groups": {'epoch'}, + }, 'earth': { "groups": {'noun', 'object', 'planet'}, }, From bbba6b75e169ed3a6d3d2d35b4f5edc11a2c52c0 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 24 May 2017 20:30:50 +0200 Subject: [PATCH 13/27] Make remix model more powerful. Accept elements in the remix that are not present in the subtrees. --- naive-nlu/tree_nlu/parsing.py | 31 +++++++++++++++++++---------- naive-nlu/tree_nlu/test.py | 4 +++- naive-nlu/tree_nlu/tests/gac_100.py | 2 +- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/naive-nlu/tree_nlu/parsing.py b/naive-nlu/tree_nlu/parsing.py index 080aaa0..6e1fe30 100644 --- a/naive-nlu/tree_nlu/parsing.py +++ b/naive-nlu/tree_nlu/parsing.py @@ -127,7 +127,11 @@ def integrate_language(knowledge_base, example): def apply_remix(tokens, remix): rebuilt = [] for i in remix: - rebuilt.append(tokens[i]) + if isinstance(i, int): + rebuilt.append(tokens[i]) + else: + assert(isinstance(i, str)) + rebuilt.append(i) return rebuilt @@ -154,13 +158,14 @@ def get_possible_remixes(knowledge_base, matcher, similar_matcher): for element in matcher: logging.debug("- {}".format(element)) logging.debug("+ {}".format(similar_matcher)) - assert(element in similar_matcher or isinstance(element, dict)) - - if isinstance(element, dict): - indexes = all_matching_indexes(knowledge_base, similar_matcher, element) + if element in similar_matcher or isinstance(element, dict): + if isinstance(element, dict): + indexes = all_matching_indexes(knowledge_base, similar_matcher, element) + else: + indexes = all_indexes(similar_matcher, element) + matrix.append(indexes) else: - indexes = all_indexes(similar_matcher, element) - matrix.append(indexes) + matrix.append([element]) # TODO: do some scoring to find the most "interesting combination" return [list(x) for x in list(zip(*matrix))] @@ -294,10 +299,16 @@ def reprocess_language_knowledge(knowledge_base, examples): def reverse_remix(tree_section, remix): result_section = [] + offset = 0 for origin in remix: - if origin >= len(tree_section): - return None - result_section.append(copy.deepcopy(tree_section[origin])) + if isinstance(origin, int): + if origin >= len(tree_section): + return None + + result_section.append(copy.deepcopy(tree_section[origin + offset])) + else: + assert(isinstance(origin, str)) + offset += 1 return result_section + tree_section[len(remix):] diff --git a/naive-nlu/tree_nlu/test.py b/naive-nlu/tree_nlu/test.py index 92addcb..caaacdd 100644 --- a/naive-nlu/tree_nlu/test.py +++ b/naive-nlu/tree_nlu/test.py @@ -17,7 +17,9 @@ def main(): test_module.main() print(" \x1b[1;32m✓\x1b[0m {}".format(test_name)) except AssertionError as ae: - print(" \x1b[1;31m✗\x1b[0m {}: {}".format(test_name, ae.args[0])) + print(" \x1b[1;31m✗\x1b[0m {}: {}".format(test_name, + ae.args[0] if len(ae.args) > 0 + else '\b\b \b')) failed = True except Exception as e: diff --git a/naive-nlu/tree_nlu/tests/gac_100.py b/naive-nlu/tree_nlu/tests/gac_100.py index eb3fe39..a1d7760 100644 --- a/naive-nlu/tree_nlu/tests/gac_100.py +++ b/naive-nlu/tree_nlu/tests/gac_100.py @@ -36,7 +36,7 @@ examples = [ ('full_example', { "text": "Is it hot during the summer?", - "affirmation": "it is hot during the summer", + "affirmation": "it is hot during summer", "parsed": ("question", ("implies", 'summer', 'hot')), "answer": True, From a99449c04a095e738827b84bb6845294f359a5f8 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 24 May 2017 20:42:15 +0200 Subject: [PATCH 14/27] Add property-has-value example. --- naive-nlu/tree_nlu/knowledge_evaluation.py | 19 +++++++++++++++++++ naive-nlu/tree_nlu/tests/gac_100.py | 17 +++++++++++------ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/naive-nlu/tree_nlu/knowledge_evaluation.py b/naive-nlu/tree_nlu/knowledge_evaluation.py index eb4e7c9..0869728 100644 --- a/naive-nlu/tree_nlu/knowledge_evaluation.py +++ b/naive-nlu/tree_nlu/knowledge_evaluation.py @@ -171,12 +171,31 @@ def implies(knowledge_base, elements, precedent, consequent): ) +def property_has_value(knowledge_base, elements, subj, prop, value): + subj = resolve(knowledge_base, elements, subj) + prop = resolve(knowledge_base, elements, prop) + value = resolve(knowledge_base, elements, value) + + if subj not in knowledge_base: + knowledge_base[subj] = {} + + if prop not in knowledge_base[subj]: + knowledge_base[subj][prop] = set() + + return modifiable_element_for_existance_in_set( + container=knowledge_base[subj], + set_name=prop, + element=value + ) + + knowledge_ingestion = { "exists-property-with-value": exists_property_with_value, "pertenence-to-group": pertenence_to_group, "has-capacity": has_capacity, "question": question, "implies": implies, + "property-has-value": property_has_value, } diff --git a/naive-nlu/tree_nlu/tests/gac_100.py b/naive-nlu/tree_nlu/tests/gac_100.py index a1d7760..680f909 100644 --- a/naive-nlu/tree_nlu/tests/gac_100.py +++ b/naive-nlu/tree_nlu/tests/gac_100.py @@ -41,12 +41,14 @@ examples = [ ("implies", 'summer', 'hot')), "answer": True, }), - # { - # "text": "is chile in south america ?", - # "affirmation": "is chile in south america ?", - # "parsed": (), - # "answer": None, - # }, + ('full_example', + { + "text": "is chile in south america ?", + "affirmation": "chile is in south america", + "parsed": ("question", + ("property-has-value", 'chile', 'location', 'south america')), + "answer": True, + }), # { # "text": "Was Socrates a man?", # "affirmation": "Was Socrates a man?", @@ -641,6 +643,9 @@ base_knowledge = { 'fly': { "groups": {'verb'}, }, + 'chile': { + "groups": {'noun'}, + } } def main(): From e51ba71ec5b2c1c708e2a0cc4bbde6b748eb756c Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 24 May 2017 20:42:45 +0200 Subject: [PATCH 15/27] Add after_execution mechanism to gac100 test. --- naive-nlu/tree_nlu/tests/gac_100.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/naive-nlu/tree_nlu/tests/gac_100.py b/naive-nlu/tree_nlu/tests/gac_100.py index 680f909..29d5ead 100644 --- a/naive-nlu/tree_nlu/tests/gac_100.py +++ b/naive-nlu/tree_nlu/tests/gac_100.py @@ -668,6 +668,10 @@ def main(): if result != data['answer']: raise AssertionError('{} is not {}'.format(result, data['answer'])) + if "after_execution" in data: + for f in data["after_execution"]: + f(knowledge) + elif example_type == 'text_example': affirmation = data['affirmation'] question = data['question'] From 4d7afb01745df7b24b8db2f6b5c3d0a39422a9bd Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 24 May 2017 20:42:54 +0200 Subject: [PATCH 16/27] Add set-capable json dumper. --- naive-nlu/tree_nlu/utils/json_dumper.py | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 naive-nlu/tree_nlu/utils/json_dumper.py diff --git a/naive-nlu/tree_nlu/utils/json_dumper.py b/naive-nlu/tree_nlu/utils/json_dumper.py new file mode 100644 index 0000000..061dd68 --- /dev/null +++ b/naive-nlu/tree_nlu/utils/json_dumper.py @@ -0,0 +1,4 @@ +def dumper(obj): + if isinstance(obj, set): + return list(obj) + return obj From 02f909269a380697fe39e5d82a6021d441ca3cbc Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 24 May 2017 21:09:23 +0200 Subject: [PATCH 17/27] Use after_execution mechanism for test asserts. --- naive-nlu/tree_nlu/tests/gac_100.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/naive-nlu/tree_nlu/tests/gac_100.py b/naive-nlu/tree_nlu/tests/gac_100.py index 29d5ead..b5021ec 100644 --- a/naive-nlu/tree_nlu/tests/gac_100.py +++ b/naive-nlu/tree_nlu/tests/gac_100.py @@ -1,5 +1,11 @@ from ..knowledge_base import KnowledgeBase +def _assert(args): + assert(args) + +def _assert_msg(args, msg): + assert(args, msg) + examples = [ ('full_example', { @@ -8,6 +14,9 @@ examples = [ "parsed": ("question", ("exists-property-with-value", 'icecream', 'cold')), "answer": True, + "after_execution": [( + lambda knowledge: _assert('cold' in knowledge.knowledge['icecream']['property']) + ),], }), ('full_example', { @@ -16,6 +25,9 @@ examples = [ "parsed": ("question", ("pertenence-to-group", 'earth', 'planet')), "answer": True, + "after_execution": [( + lambda knowledge: _assert('planet' in knowledge.knowledge['earth']['groups']) + ),], }), ('full_example', { @@ -24,6 +36,9 @@ examples = [ "parsed": ("question", ("pertenence-to-group", 'green', 'color')), "answer": True, + "after_execution": [( + lambda knowledge: _assert('color' in knowledge.knowledge['green']['groups']) + ),], }), ('full_example', { @@ -32,6 +47,9 @@ examples = [ "parsed": ("question", ("has-capacity", 'plane', 'fly')), "answer": True, + "after_execution": [( + lambda knowledge: _assert('fly' in knowledge.knowledge['plane']['capacities']) + ),], }), ('full_example', { @@ -40,6 +58,9 @@ examples = [ "parsed": ("question", ("implies", 'summer', 'hot')), "answer": True, + "after_execution": [( + lambda knowledge: _assert('hot' in knowledge.knowledge['summer']['implications']) + ),], }), ('full_example', { From 2bfe676b2d855e83999934c397885f2e740aa753 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 24 May 2017 22:02:32 +0200 Subject: [PATCH 18/27] Integrate the knowledge ASAP. If we do this before the parsing we can leverage that semantics in the matching phase. --- naive-nlu/tree_nlu/knowledge_base.py | 7 +++++++ naive-nlu/tree_nlu/knowledge_evaluation.py | 15 ++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/naive-nlu/tree_nlu/knowledge_base.py b/naive-nlu/tree_nlu/knowledge_base.py index e00bc0d..31c84a1 100644 --- a/naive-nlu/tree_nlu/knowledge_base.py +++ b/naive-nlu/tree_nlu/knowledge_base.py @@ -24,6 +24,13 @@ class KnowledgeBase(object): # Parse everything parsed_examples = [] for example in examples: + # If there's parsed data, leverage it ASAP + if 'parsed' in example: + result = knowledge_evaluation.integrate_information(self.knowledge, { + "parsed": example['parsed'], + }) + self.act_upon(result) + logging.info("\x1b[7;32m> {} \x1b[0m".format(example)) tokens, decomposition, inferred_tree = parsing.integrate_language(self, example) logging.info(tokens) diff --git a/naive-nlu/tree_nlu/knowledge_evaluation.py b/naive-nlu/tree_nlu/knowledge_evaluation.py index 0869728..4a49faa 100644 --- a/naive-nlu/tree_nlu/knowledge_evaluation.py +++ b/naive-nlu/tree_nlu/knowledge_evaluation.py @@ -123,12 +123,17 @@ def pertenence_to_group(knowledge_base, elements, subj, group): if "groups" not in knowledge_base[subj]: knowledge_base[subj]["groups"] = set() - return modifiable_element_for_existance_in_set( - container=knowledge_base[subj], - set_name="groups", - element=group - ) + if group not in knowledge_base: + knowledge_base[group] = {} + if "groups" not in knowledge_base[group]: + knowledge_base[group]["groups"] = set() + + return modifiable_element_for_existance_in_group( + container=knowledge_base[subj], + element=group, + backlink=knowledge_base[group], + ) def has_capacity(knowledge_base, elements, subj, capacity): subj = resolve(knowledge_base, elements, subj) From cbeefcf76ba798d3cdf00c9c33284f5ef6197e35 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 24 May 2017 22:05:46 +0200 Subject: [PATCH 19/27] Identify group "concepts" on-flight. --- naive-nlu/tree_nlu/knowledge_evaluation.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/naive-nlu/tree_nlu/knowledge_evaluation.py b/naive-nlu/tree_nlu/knowledge_evaluation.py index 4a49faa..5cc4f65 100644 --- a/naive-nlu/tree_nlu/knowledge_evaluation.py +++ b/naive-nlu/tree_nlu/knowledge_evaluation.py @@ -113,6 +113,21 @@ def modifiable_element_for_existance_in_set(container, set_name, element): setter=setter, ) +def modifiable_element_for_existance_in_group(container, element, backlink, set_name='groups'): + def getter(): + nonlocal container, element, backlink, set_name + return (set_name in container) and (element in container[set_name]) + + def setter(): + nonlocal container, set_name, element + backlink['groups'].add(set_name) + return container[set_name].add(element) + + return ModifiableProperty( + getter=getter, + setter=setter, + ) + def pertenence_to_group(knowledge_base, elements, subj, group): subj = resolve(knowledge_base, elements, subj) group = resolve(knowledge_base, elements, group) From 75d690120bbc2325441512fe0decf1e47a6e9c41 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 24 May 2017 22:06:18 +0200 Subject: [PATCH 20/27] Improve error reporting on tests. --- naive-nlu/tree_nlu/test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/naive-nlu/tree_nlu/test.py b/naive-nlu/tree_nlu/test.py index caaacdd..ec57f8b 100644 --- a/naive-nlu/tree_nlu/test.py +++ b/naive-nlu/tree_nlu/test.py @@ -17,13 +17,13 @@ def main(): test_module.main() print(" \x1b[1;32m✓\x1b[0m {}".format(test_name)) except AssertionError as ae: - print(" \x1b[1;31m✗\x1b[0m {}: {}".format(test_name, - ae.args[0] if len(ae.args) > 0 - else '\b\b \b')) + print(" \x1b[1;31m✗\x1b[0m {}".format(test_name, + (' : [Assertion] {}'.format(ae.args[0])) if len(ae.args) > 0 + else '')) failed = True except Exception as e: - print(" \x1b[1;7;31m!\x1b[0m {} {}".format(test_name, e)) + print(" \x1b[1;7;31m!\x1b[0m {} : [Exception] {}".format(test_name, e)) failed = True traceback.print_exc() From e6cbb5438298a1ea1bc7dc661c4ade2bc01295aa Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 24 May 2017 22:09:26 +0200 Subject: [PATCH 21/27] Also use matching tokens to score tree similarity. --- naive-nlu/tree_nlu/parsing.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/naive-nlu/tree_nlu/parsing.py b/naive-nlu/tree_nlu/parsing.py index 6e1fe30..ed5903a 100644 --- a/naive-nlu/tree_nlu/parsing.py +++ b/naive-nlu/tree_nlu/parsing.py @@ -88,7 +88,7 @@ def integrate_language(knowledge_base, example): for position, atom in lower_levels: logging.debug("\x1b[1mSelecting\x1b[0m: {}".format(atom)) - similar = get_similar_tree(knowledge_base, atom) + similar = get_similar_tree(knowledge_base, atom, tokens) remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar) _, matcher, result = make_template(knowledge_base, tokens, atom) logging.debug("Tx: {}".format(tokens)) @@ -138,7 +138,7 @@ def apply_remix(tokens, remix): def build_remix_matrix(knowledge_base, tokens, atom, similar): tokens = list(tokens) tokens, matcher, result = make_template(knowledge_base, tokens, atom) - similar_matcher, similar_result, similar_result_resolved, _ = similar + similar_matcher, similar_result, similar_result_resolved, _, _ = similar start_bounds, end_bounds = find_bounds(matcher, similar_matcher) @@ -219,7 +219,7 @@ def find_bounds(matcher, similar_matcher): return start_bounds, end_bounds -def get_similar_tree(knowledge_base, atom): +def get_similar_tree(knowledge_base, atom, tokens): possibilities = [] # Find matching possibilities @@ -243,12 +243,17 @@ def get_similar_tree(knowledge_base, atom): raw)) # TODO: Probably should take into account the categories of the elements in the "intake" ([0]) element - score = sum([resolved[i] == atom[i] + atom_score = sum([resolved[i] == atom[i] for i in range(min(len(resolved), len(atom)))]) - sorted_possibilities.append((raw, possibility, resolved, score)) - sorted_possibilities = sorted(sorted_possibilities, key=lambda p: p[3], reverse=True) + token_score = sum([similar_token in tokens + for similar_token + in raw]) + + sorted_possibilities.append((raw, possibility, resolved, atom_score, token_score)) + + sorted_possibilities = sorted(sorted_possibilities, key=lambda p: p[3] * 100 + p[4], reverse=True) if len(sorted_possibilities) < 1: return None From 7cdf8a310de9d70d8cdc32d383f3c5c95f5e4d97 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 24 May 2017 22:10:17 +0200 Subject: [PATCH 22/27] Unroll get_matching last list-comprehension. --- naive-nlu/tree_nlu/parsing.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/naive-nlu/tree_nlu/parsing.py b/naive-nlu/tree_nlu/parsing.py index ed5903a..a179dd4 100644 --- a/naive-nlu/tree_nlu/parsing.py +++ b/naive-nlu/tree_nlu/parsing.py @@ -278,14 +278,23 @@ def get_matching(sample, other): x[0][i][0] == sample[0][i][0], other)) - return [sample[0][x] if isinstance(sample[0][x], str) - else - sample[0][x] if isinstance(sample[0][x], tuple) - else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b, - map(lambda y: y[0][x]['groups'], - other))} - for x - in range(l)] + matching = [] + for x in range(l): # Generate the combination of this and other(s) matcher + first_sample_data = sample[0][x] + if isinstance(first_sample_data, str): + matching.append(first_sample_data) + elif isinstance(first_sample_data, tuple): + matching.append(first_sample_data) + else: + this_groups = sample[0][x]['groups'] + if len(other) > 0: + other_groups = reduce(lambda a, b: a & b, + map(lambda y: y[0][x]['groups'], + other)) + this_groups = this_groups & other_groups + + matching.append({'groups': this_groups}) + return matching def reprocess_language_knowledge(knowledge_base, examples): From 89b281fd6f36764b1849e63281c30e7c93ce0f62 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 24 May 2017 22:15:56 +0200 Subject: [PATCH 23/27] Lean on knowledge too when defining bounds. --- naive-nlu/tree_nlu/parsing.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/naive-nlu/tree_nlu/parsing.py b/naive-nlu/tree_nlu/parsing.py index a179dd4..13436bd 100644 --- a/naive-nlu/tree_nlu/parsing.py +++ b/naive-nlu/tree_nlu/parsing.py @@ -8,7 +8,7 @@ import re import copy from functools import reduce -from typing import List +from typing import List, Dict from .modifiable_property import ModifiableProperty from . import parameters @@ -95,6 +95,8 @@ def integrate_language(knowledge_base, example): logging.debug("Mx: {}".format(matcher)) logging.debug("Rx: {}".format(result)) logging.debug("Remix: {}".format(remix)) + logging.debug("Sx: {}".format(start_bounds)) + logging.debug("Ex: {}".format(end_bounds)) after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix) assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens)) @@ -140,7 +142,7 @@ def build_remix_matrix(knowledge_base, tokens, atom, similar): tokens, matcher, result = make_template(knowledge_base, tokens, atom) similar_matcher, similar_result, similar_result_resolved, _, _ = similar - start_bounds, end_bounds = find_bounds(matcher, similar_matcher) + start_bounds, end_bounds = find_bounds(knowledge_base, matcher, similar_matcher) for i, element in (end_bounds + start_bounds[::-1]): matcher.pop(i) @@ -195,13 +197,21 @@ def all_matching_indexes(knowledge_base, collection, element): instance = knowledge_base.knowledge[instance]["groups"] intersection = set(instance) & set(element) - if len(intersection) > 0: + if (len(intersection) > 0 or (0 == len(instance) == len(element))): indexes.append((i, intersection)) return [x[0] for x in sorted(indexes, key=lambda x: len(x[1]), reverse=True)] -def find_bounds(matcher, similar_matcher): +def element_matches_groups(knowledge, element: Dict, groups): + if isinstance(groups, str) and groups in knowledge: + return len(knowledge[element].get("groups", set()) & element['groups']) > 0 + elif isinstance(groups, dict): + return len(element.get("groups", set()) & element['groups']) > 0 + return False + + +def find_bounds(knowledge, matcher, similar_matcher): start_bounds = [] for i, element in enumerate(matcher): if element in similar_matcher: @@ -211,7 +221,15 @@ def find_bounds(matcher, similar_matcher): end_bounds = [] for i, element in enumerate(matcher[::-1]): - if element in similar_matcher: + in_similar = False + if isinstance(element, str): + in_similar = element in similar_matcher + elif isinstance(element, dict): + in_similar = any(map(lambda groups: element_matches_groups(knowledge.knowledge, + element, groups), + similar_matcher)) + + if in_similar: break else: end_bounds.append((len(matcher) - (i + 1), element)) From 9ed43aa36204722d41363b0d159d489e0365db3b Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 24 May 2017 22:16:30 +0200 Subject: [PATCH 24/27] Return None when a matrix cannot be applied. --- naive-nlu/tree_nlu/parsing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/naive-nlu/tree_nlu/parsing.py b/naive-nlu/tree_nlu/parsing.py index 13436bd..265bd59 100644 --- a/naive-nlu/tree_nlu/parsing.py +++ b/naive-nlu/tree_nlu/parsing.py @@ -130,6 +130,8 @@ def apply_remix(tokens, remix): rebuilt = [] for i in remix: if isinstance(i, int): + if i >= len(tokens): + return None rebuilt.append(tokens[i]) else: assert(isinstance(i, str)) From a7f70d2888fa346e0e5fd5192b1737240fbb7f42 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 24 May 2017 22:17:57 +0200 Subject: [PATCH 25/27] Unlock 7th GAC 100. --- naive-nlu/tree_nlu/tests/gac_100.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/naive-nlu/tree_nlu/tests/gac_100.py b/naive-nlu/tree_nlu/tests/gac_100.py index b5021ec..554c4b6 100644 --- a/naive-nlu/tree_nlu/tests/gac_100.py +++ b/naive-nlu/tree_nlu/tests/gac_100.py @@ -69,13 +69,21 @@ examples = [ "parsed": ("question", ("property-has-value", 'chile', 'location', 'south america')), "answer": True, + "after_execution": [( + lambda knowledge: _assert('south america' in knowledge.knowledge['chile']['location']) + ),], + }), + ('full_example', + { + "text": "Was Socrates a man?", + "affirmation": "Socrates was a man", + "parsed": ("question", + ("pertenence-to-group", 'socrates', 'man')), + "answer": True, + "after_execution": [( + lambda knowledge: _assert('man' in knowledge.knowledge['socrates']['groups']) + ),], }), - # { - # "text": "Was Socrates a man?", - # "affirmation": "Was Socrates a man?", - # "parsed": (), - # "answer": None, - # }, # { # "text": "Computers use electricity?", # "affirmation": "Computers use electricity?", @@ -666,7 +674,7 @@ base_knowledge = { }, 'chile': { "groups": {'noun'}, - } + }, } def main(): From 8e304b2a09902a01d67ded3a32056ccc9e511df6 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 24 May 2017 22:25:53 +0200 Subject: [PATCH 26/27] Always create the "groups" set for new elements. This allows a smaller initial knowledge base. --- naive-nlu/tree_nlu/knowledge_evaluation.py | 8 ++++---- naive-nlu/tree_nlu/tests/gac_100.py | 12 ------------ 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/naive-nlu/tree_nlu/knowledge_evaluation.py b/naive-nlu/tree_nlu/knowledge_evaluation.py index 5cc4f65..524bd56 100644 --- a/naive-nlu/tree_nlu/knowledge_evaluation.py +++ b/naive-nlu/tree_nlu/knowledge_evaluation.py @@ -133,13 +133,13 @@ def pertenence_to_group(knowledge_base, elements, subj, group): group = resolve(knowledge_base, elements, group) if subj not in knowledge_base: - knowledge_base[subj] = {} + knowledge_base[subj] = {'groups': set()} if "groups" not in knowledge_base[subj]: knowledge_base[subj]["groups"] = set() if group not in knowledge_base: - knowledge_base[group] = {} + knowledge_base[group] = {'groups': set()} if "groups" not in knowledge_base[group]: knowledge_base[group]["groups"] = set() @@ -179,7 +179,7 @@ def implies(knowledge_base, elements, precedent, consequent): consequent = resolve(knowledge_base, elements, consequent) if precedent not in knowledge_base: - knowledge_base[precedent] = {} + knowledge_base[precedent] = {'groups': set()} if "implications" not in knowledge_base[precedent]: knowledge_base[precedent]["implications"] = set() @@ -197,7 +197,7 @@ def property_has_value(knowledge_base, elements, subj, prop, value): value = resolve(knowledge_base, elements, value) if subj not in knowledge_base: - knowledge_base[subj] = {} + knowledge_base[subj] = {'groups': set()} if prop not in knowledge_base[subj]: knowledge_base[subj][prop] = set() diff --git a/naive-nlu/tree_nlu/tests/gac_100.py b/naive-nlu/tree_nlu/tests/gac_100.py index 554c4b6..5716568 100644 --- a/naive-nlu/tree_nlu/tests/gac_100.py +++ b/naive-nlu/tree_nlu/tests/gac_100.py @@ -648,33 +648,21 @@ base_knowledge = { 'icecream': { "groups": {'noun', 'object', 'comestible', 'sweet'}, }, - 'cold': { - "groups": {'property', 'temperature'}, - }, 'hot': { "groups": {'property', 'temperature'}, }, 'summer': { "groups": {'epoch'}, }, - 'earth': { - "groups": {'noun', 'object', 'planet'}, - }, 'planet': { "groups": {'noun', 'group'}, }, - 'color': { - "groups": {'property', 'group'}, - }, 'green': { "groups": {'noun', 'color', 'concept'}, }, 'fly': { "groups": {'verb'}, }, - 'chile': { - "groups": {'noun'}, - }, } def main(): From e0a5f02c34eecff1a7f4ef8046c6bccc15f9c814 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 24 May 2017 22:37:44 +0200 Subject: [PATCH 27/27] Add progress bar visuals to tests. --- naive-nlu/tree_nlu/test.py | 6 +++--- naive-nlu/tree_nlu/tests/gac_100.py | 14 +++++++++++++- naive-nlu/tree_nlu/utils/visuals.py | 15 +++++++++++++++ 3 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 naive-nlu/tree_nlu/utils/visuals.py diff --git a/naive-nlu/tree_nlu/test.py b/naive-nlu/tree_nlu/test.py index ec57f8b..810e3c8 100644 --- a/naive-nlu/tree_nlu/test.py +++ b/naive-nlu/tree_nlu/test.py @@ -17,13 +17,13 @@ def main(): test_module.main() print(" \x1b[1;32m✓\x1b[0m {}".format(test_name)) except AssertionError as ae: - print(" \x1b[1;31m✗\x1b[0m {}".format(test_name, - (' : [Assertion] {}'.format(ae.args[0])) if len(ae.args) > 0 + print(" \x1b[1;31m✗\x1b[0m {}{}".format(test_name, + ('\n [Assertion] {}'.format(ae.args[0])) if len(ae.args) > 0 else '')) failed = True except Exception as e: - print(" \x1b[1;7;31m!\x1b[0m {} : [Exception] {}".format(test_name, e)) + print(" \x1b[1;7;31m!\x1b[0m {}\n [Exception] {}".format(test_name, e)) failed = True traceback.print_exc() diff --git a/naive-nlu/tree_nlu/tests/gac_100.py b/naive-nlu/tree_nlu/tests/gac_100.py index 5716568..b2c31e0 100644 --- a/naive-nlu/tree_nlu/tests/gac_100.py +++ b/naive-nlu/tree_nlu/tests/gac_100.py @@ -1,4 +1,5 @@ from ..knowledge_base import KnowledgeBase +from ..utils.visuals import show_progbar def _assert(args): assert(args) @@ -670,14 +671,20 @@ def main(): knowledge=base_knowledge, ) - for example_type, data in examples: + total = len(examples) + + for i, (example_type, data) in enumerate(examples): if example_type == 'full_example': affirmation = { 'text': data['affirmation'], 'parsed': data['parsed'][1], } question = data + + show_progbar(i, total, data['affirmation']) differences = knowledge.train([affirmation]) + + show_progbar(i, total, data['text']) differences = knowledge.train([question]) result, _, _ = knowledge.process(data['text']) @@ -690,7 +697,10 @@ def main(): f(knowledge) elif example_type == 'text_example': + show_progbar(i, total, data['affirmation']) affirmation = data['affirmation'] + + show_progbar(i, total, data['question']) question = data['question'] _, _, _ = knowledge.process(affirmation) @@ -701,3 +711,5 @@ def main(): else: raise NotImplementedError('Example type: {}'.format(example_type)) + + print("\r\x1b[K", end='') diff --git a/naive-nlu/tree_nlu/utils/visuals.py b/naive-nlu/tree_nlu/utils/visuals.py new file mode 100644 index 0000000..a6dd611 --- /dev/null +++ b/naive-nlu/tree_nlu/utils/visuals.py @@ -0,0 +1,15 @@ +def show_progbar(done, total, msg=''): + total_blocks = 10 + blocks_done = (done * total_blocks) // total + blocks_to_go = total_blocks - blocks_done + + print('\r\x1b[K' # Go to the start of the line + '\x1b[0m' # Restart the "style" + '|' # Put the first "|" + + blocks_done * '█' # Completed blocks + + blocks_to_go * ' ' # Uncompleted blocks + + '\x1b[7m|\x1b[0m' # End the bar + + ' ' + + msg # Add message + + '\r' # Go back to the start + , end='')