Exploration of layers for tokenization and parsing.

This commit is contained in:
kenkeiras 2018-04-23 22:48:10 +02:00
parent c18c9b8cb1
commit a444766c7c
10 changed files with 173 additions and 108 deletions

View file

@ -99,14 +99,14 @@ examples = [
lambda knowledge: _assert('electricity' in knowledge.knowledge['computers']['performs-over']['use'])
),],
}),
('full_example',
{
"text": "The dominant language in france is french?",
"affirmation": "The dominant language in france is french",
"parsed": ("question",
("property-has-value", "france", "dominant-language", "french")),
"answer": True,
}),
# ('full_example',
# {
# "text": "The dominant language in france is french?",
# "affirmation": "The dominant language in france is french",
# "parsed": ("question",
# ("property-has-value", "france", "dominant-language", "french")),
# "answer": True,
# }),
# {
# "text": "was abraham lincoln once president of the united states?",
# "affirmation": "was abraham lincoln once president of the united states?",

View file

@ -63,11 +63,11 @@ def main():
show_progbar(i, total, example['text'])
if case_type == 'example':
with session().log(example['text']):
knowledge.train_tokenizer(example)
knowledge.layers.tokenization.train(example)
elif case_type == 'test':
with session().log(example['text']):
tokens = list(knowledge.tokenize(example['text']))
tokens = list(knowledge.layers.tokenization.tokenize(example['text']))
session().log('Expected “{}”, found “{}'
.format(example['tokens'], tokens))