Add (non-passing) tokenization.
This commit is contained in:
parent
75174e1736
commit
fc37450565
7 changed files with 229 additions and 11 deletions
19
naive-nlu/tree_nlu/utils/tokenization.py
Normal file
19
naive-nlu/tree_nlu/utils/tokenization.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
BASIC_TOKENIZATION_EXAMPLES = (
|
||||
({
|
||||
"text": 'cat',
|
||||
"tokens": ['cat'],
|
||||
}),
|
||||
({
|
||||
"text": 'text separated by spaces',
|
||||
"tokens": ['text', 'separated', 'by', 'spaces'],
|
||||
}),
|
||||
({
|
||||
"text": 'is earth a planet?',
|
||||
"tokens": ['is', 'earth', 'a', 'planet', '?'],
|
||||
}),
|
||||
)
|
||||
|
||||
|
||||
def train_basic_tokenization(knowledge_base):
|
||||
for example in BASIC_TOKENIZATION_EXAMPLES:
|
||||
knowledge_base.train_tokenizer(example)
|
Loading…
Add table
Add a link
Reference in a new issue