Add (non-passing) tokenization.

This commit is contained in:
kenkeiras 2018-04-01 20:24:09 +02:00
parent 75174e1736
commit fc37450565
7 changed files with 229 additions and 11 deletions

View file

@ -0,0 +1,19 @@
BASIC_TOKENIZATION_EXAMPLES = (
({
"text": 'cat',
"tokens": ['cat'],
}),
({
"text": 'text separated by spaces',
"tokens": ['text', 'separated', 'by', 'spaces'],
}),
({
"text": 'is earth a planet?',
"tokens": ['is', 'earth', 'a', 'planet', '?'],
}),
)
def train_basic_tokenization(knowledge_base):
for example in BASIC_TOKENIZATION_EXAMPLES:
knowledge_base.train_tokenizer(example)