From 0fbb9238ebc0a1ed451084f3d8677c098d55c5d7 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Sun, 4 Jun 2017 18:53:10 +0200 Subject: [PATCH] Extract more contextual info from the words. A property dictionary is now only to be considered equal to a word when it shares at least one group, or neither has groups. --- naive-nlu/tree_nlu/knowledge_base.py | 1 - naive-nlu/tree_nlu/knowledge_evaluation.py | 18 ++++++++++++++++++ naive-nlu/tree_nlu/parsing.py | 3 +-- naive-nlu/tree_nlu/tests/basic.py | 6 ++++++ naive-nlu/tree_nlu/tests/gac_100.py | 14 +++++++++++--- 5 files changed, 36 insertions(+), 6 deletions(-) diff --git a/naive-nlu/tree_nlu/knowledge_base.py b/naive-nlu/tree_nlu/knowledge_base.py index 31c84a1..33bd9e9 100644 --- a/naive-nlu/tree_nlu/knowledge_base.py +++ b/naive-nlu/tree_nlu/knowledge_base.py @@ -22,7 +22,6 @@ class KnowledgeBase(object): knowledge_before = copy.deepcopy(self.knowledge) # Parse everything - parsed_examples = [] for example in examples: # If there's parsed data, leverage it ASAP if 'parsed' in example: diff --git a/naive-nlu/tree_nlu/knowledge_evaluation.py b/naive-nlu/tree_nlu/knowledge_evaluation.py index bbc8ef0..2e71712 100644 --- a/naive-nlu/tree_nlu/knowledge_evaluation.py +++ b/naive-nlu/tree_nlu/knowledge_evaluation.py @@ -273,3 +273,21 @@ def integrate_information(knowledge_base, example): return tagged_with_ast( ast, elements, knowledge_ingestion[method](knowledge_base, elements, *args)) + +def can_be_used_in_place(knowledge, token, minisegment): + if token not in knowledge.knowledge: + return False + + info = knowledge.knowledge[token] + info_groups = info.get('groups', set()) + minisegment_groups = minisegment.get('groups', set()) + + # Common group + if len(info_groups & minisegment_groups) > 0: + return True + + # Neither has a group + elif len(info_groups) == 0 == len(minisegment_groups): + return True + + return False diff --git a/naive-nlu/tree_nlu/parsing.py b/naive-nlu/tree_nlu/parsing.py index f164d0b..5352812 100644 --- a/naive-nlu/tree_nlu/parsing.py +++ b/naive-nlu/tree_nlu/parsing.py @@ -395,8 +395,7 @@ def is_definite_minisegment(minisegment): def match_token(knowledge, next_token, minisegment): if isinstance(minisegment, dict): - # TODO: check if the dictionary matches the values - return True + return knowledge_evaluation.can_be_used_in_place(knowledge, next_token, minisegment) elif isinstance(minisegment, str): # TODO: check if the two elements can be used in each other place return next_token == minisegment diff --git a/naive-nlu/tree_nlu/tests/basic.py b/naive-nlu/tree_nlu/tests/basic.py index 450e7e0..d5d959a 100644 --- a/naive-nlu/tree_nlu/tests/basic.py +++ b/naive-nlu/tree_nlu/tests/basic.py @@ -98,6 +98,12 @@ base_knowledge = { 'fly': { "groups": {'verb'}, }, + 'bus': { + "groups": {'noun'}, + }, + 'run': { + "groups": {'verb'}, + }, 'swim': { "groups": {'verb'}, }, diff --git a/naive-nlu/tree_nlu/tests/gac_100.py b/naive-nlu/tree_nlu/tests/gac_100.py index 5e331b9..daaa696 100644 --- a/naive-nlu/tree_nlu/tests/gac_100.py +++ b/naive-nlu/tree_nlu/tests/gac_100.py @@ -1,3 +1,4 @@ +import logging from ..knowledge_base import KnowledgeBase from ..utils.visuals import show_progbar @@ -109,7 +110,7 @@ examples = [ # }, ('text_example', { - "question": "Is milk white?", + "question": "is milk white?", "affirmation": "milk is white", "answer": True, }), @@ -662,9 +663,15 @@ base_knowledge = { 'planet': { "groups": {'noun', 'group'}, }, + 'white': { + "groups": {'noun', 'color', 'concept', 'property'}, + }, 'green': { "groups": {'noun', 'color', 'concept'}, }, + 'milk': { + "groups": {'noun'}, + }, 'fly': { "groups": {'verb'}, }, @@ -713,11 +720,12 @@ def main(): elif example_type == 'text_example': show_progbar(i, total, data['affirmation']) affirmation = data['affirmation'] + logging.debug("Processing affirmation: {}".format(affirmation)) + _, _, _ = knowledge.process(affirmation) show_progbar(i, total, data['question']) question = data['question'] - - _, _, _ = knowledge.process(affirmation) + logging.debug("Processing question : {}".format(question)) result, _, _ = knowledge.process(question) if result != data['answer']: