Exploration of layers for tokenization and parsing.

2018-04-23 22:48:10 +02:00 · 2018-04-23 22:48:10 +02:00 · a444766c7c
commit a444766c7c
parent c18c9b8cb1
10 changed files with 173 additions and 108 deletions
--- a/naive-nlu/tree_nlu/tests/gac_100.py
+++ b/naive-nlu/tree_nlu/tests/gac_100.py
@ -99,14 +99,14 @@ examples = [
             lambda knowledge: _assert('electricity' in knowledge.knowledge['computers']['performs-over']['use'])
         ),],
     }),
-    ('full_example',
-     {
-         "text": "The dominant language in france is french?",
-         "affirmation": "The dominant language in france is french",
-         "parsed": ("question",
-                    ("property-has-value", "france", "dominant-language", "french")),
-         "answer": True,
-     }),
+    # ('full_example',
+    #  {
+    #      "text": "The dominant language in france is french?",
+    #      "affirmation": "The dominant language in france is french",
+    #      "parsed": ("question",
+    #                 ("property-has-value", "france", "dominant-language", "french")),
+    #      "answer": True,
+    #  }),
    # {
    #     "text": "was abraham lincoln once president of the united states?",
    #     "affirmation": "was abraham lincoln once president of the united states?",
--- a/naive-nlu/tree_nlu/tests/tokenization.py
+++ b/naive-nlu/tree_nlu/tests/tokenization.py
@ -63,11 +63,11 @@ def main():
        show_progbar(i, total, example['text'])
        if case_type == 'example':
            with session().log(example['text']):
-                knowledge.train_tokenizer(example)
+                knowledge.layers.tokenization.train(example)

        elif case_type == 'test':
            with session().log(example['text']):
-                tokens = list(knowledge.tokenize(example['text']))
+                tokens = list(knowledge.layers.tokenization.tokenize(example['text']))

                session().log('Expected “{}”, found “{}”'
                            .format(example['tokens'], tokens))