20 lines
467 B
Python
20 lines
467 B
Python
|
BASIC_TOKENIZATION_EXAMPLES = (
|
||
|
({
|
||
|
"text": 'cat',
|
||
|
"tokens": ['cat'],
|
||
|
}),
|
||
|
({
|
||
|
"text": 'text separated by spaces',
|
||
|
"tokens": ['text', 'separated', 'by', 'spaces'],
|
||
|
}),
|
||
|
({
|
||
|
"text": 'is earth a planet?',
|
||
|
"tokens": ['is', 'earth', 'a', 'planet', '?'],
|
||
|
}),
|
||
|
)
|
||
|
|
||
|
|
||
|
def train_basic_tokenization(knowledge_base):
|
||
|
for example in BASIC_TOKENIZATION_EXAMPLES:
|
||
|
knowledge_base.train_tokenizer(example)
|