20 lines
467 B
Python
20 lines
467 B
Python
BASIC_TOKENIZATION_EXAMPLES = (
|
|
({
|
|
"text": 'cat',
|
|
"tokens": ['cat'],
|
|
}),
|
|
({
|
|
"text": 'text separated by spaces',
|
|
"tokens": ['text', 'separated', 'by', 'spaces'],
|
|
}),
|
|
({
|
|
"text": 'is earth a planet?',
|
|
"tokens": ['is', 'earth', 'a', 'planet', '?'],
|
|
}),
|
|
)
|
|
|
|
|
|
def train_basic_tokenization(knowledge_base):
|
|
for example in BASIC_TOKENIZATION_EXAMPLES:
|
|
knowledge_base.train_tokenizer(example)
|