Pass tests using tokenization.

This commit is contained in:
kenkeiras 2018-04-15 21:10:49 +02:00
parent 45cc3a8a31
commit 1306306723
4 changed files with 30 additions and 14 deletions

View File

@ -406,12 +406,22 @@ def all_indexes(collection, element):
def all_matching_indexes(knowledge_base, collection, element): def all_matching_indexes(knowledge_base, collection, element):
indexes = [] indexes = []
with session().log('Matching “{}'.format(element)):
assert("groups" in element) assert("groups" in element)
element = element["groups"] element = element["groups"]
for i, instance in enumerate(collection): for i, instance in enumerate(collection):
session().log('Checking “{}'.format(instance))
if isinstance(instance, dict): if isinstance(instance, dict):
instance = instance["groups"] instance = instance["groups"]
elif instance in knowledge_base.knowledge: elif instance in knowledge_base.knowledge:
session().log('Knowledge about “{}”: ”{}'.format(instance, knowledge_base.knowledge[instance]))
if "groups" not in knowledge_base.knowledge[instance]:
# This means that is only known as token
# so we should try to avoid using it
continue
instance = knowledge_base.knowledge[instance]["groups"] instance = knowledge_base.knowledge[instance]["groups"]
intersection = set(instance) & set(element) intersection = set(instance) & set(element)
@ -422,6 +432,7 @@ def all_matching_indexes(knowledge_base, collection, element):
def element_matches_groups(knowledge, element: Dict, groups): def element_matches_groups(knowledge, element: Dict, groups):
with session().log("Checking if e “{}” matches groups “{}".format(element, groups)):
if isinstance(groups, str) and groups in knowledge: if isinstance(groups, str) and groups in knowledge:
return len(knowledge[groups].get("groups", set()) & element['groups']) > 0 return len(knowledge[groups].get("groups", set()) & element['groups']) > 0
elif isinstance(groups, dict): elif isinstance(groups, dict):

View File

@ -11,9 +11,9 @@ logging.getLogger().setLevel(logging.ERROR)
tests = ( tests = (
("tokenization", tokenization), ("tokenization", tokenization),
# ("basic", basic), ("basic", basic),
# ("gac 100", gac_100), ("gac 100", gac_100),
# ("gac+", gac_extension), ("gac+", gac_extension),
) )

View File

@ -668,6 +668,10 @@ base_knowledge = {
'electricity': { 'electricity': {
"groups": {'power'}, "groups": {'power'},
}, },
'airplanes': {},
'white': {
'groups': {'property'},
}
} }
def main(): def main():

View File

@ -22,4 +22,5 @@ def ask_then_learn_test(knowledge: KnowledgeBase):
def main(): def main():
knowledge = gac_100.main() knowledge = gac_100.main()
knowledge.knowledge['blue'] = {'groups': {'property'}}
knowledge = ask_then_learn_test(knowledge) knowledge = ask_then_learn_test(knowledge)