Add (non-passing) tokenization.
This commit is contained in:
parent
75174e1736
commit
fc37450565
7 changed files with 229 additions and 11 deletions
|
@ -3,6 +3,7 @@ import json
|
|||
|
||||
from ..knowledge_base import KnowledgeBase
|
||||
from ..modifiable_property import is_modifiable_property
|
||||
from ..utils.tokenization import train_basic_tokenization
|
||||
|
||||
examples = [
|
||||
{
|
||||
|
@ -107,6 +108,9 @@ base_knowledge = {
|
|||
'swim': {
|
||||
"groups": {'verb'},
|
||||
},
|
||||
'planet': {
|
||||
'groups': {'noun'}
|
||||
}
|
||||
}
|
||||
|
||||
def test_assumption(expectedResponse, knowledge, query):
|
||||
|
@ -125,6 +129,8 @@ def main():
|
|||
knowledge=base_knowledge,
|
||||
)
|
||||
|
||||
train_basic_tokenization(knowledge)
|
||||
|
||||
for example in examples:
|
||||
with session().log(example['text']):
|
||||
differences = knowledge.train([example])
|
||||
|
|
67
naive-nlu/tree_nlu/tests/tokenization.py
Normal file
67
naive-nlu/tree_nlu/tests/tokenization.py
Normal file
|
@ -0,0 +1,67 @@
|
|||
from ..session.org_mode import global_session as session
|
||||
from ..knowledge_base import KnowledgeBase
|
||||
from ..utils.visuals import show_progbar
|
||||
from ..visualization import show_knowledge
|
||||
|
||||
|
||||
def _assert(args):
|
||||
assert(args)
|
||||
|
||||
|
||||
def _assert_msg(args, msg):
|
||||
assert args, msg
|
||||
|
||||
|
||||
EXAMPLES = [
|
||||
('example', {
|
||||
"text": 'cat',
|
||||
"tokens": ['cat'],
|
||||
}),
|
||||
('example', {
|
||||
"text": 'cats',
|
||||
"tokens": ['cats'],
|
||||
"meaning": { 'cats': ('add-modifier', 'cat', 'plural') },
|
||||
}),
|
||||
('example', {
|
||||
"text": 'text separated by spaces',
|
||||
"tokens": ['text', 'separated', 'by', 'spaces'],
|
||||
}),
|
||||
|
||||
('test', {
|
||||
"text": 'plane',
|
||||
"tokens": ['plane'],
|
||||
}),
|
||||
('test', {
|
||||
"text": 'planes',
|
||||
"tokens": ['planes'],
|
||||
"meaning": { 'planes': ('add-modifier', 'plane', 'plural') },
|
||||
}),
|
||||
('test', {
|
||||
"text": 'some other text',
|
||||
"tokens": ['some', 'other', 'text'],
|
||||
})
|
||||
]
|
||||
|
||||
|
||||
def main():
|
||||
knowledge = KnowledgeBase()
|
||||
|
||||
total = len(EXAMPLES)
|
||||
|
||||
for i, (case_type, example) in enumerate(EXAMPLES):
|
||||
show_progbar(i, total, example['text'])
|
||||
if case_type == 'example':
|
||||
with session().log(example['text']):
|
||||
knowledge.train_tokenizer(example)
|
||||
|
||||
elif case_type == 'test':
|
||||
with session().log(example['text']):
|
||||
tokens = list(knowledge.tokenize(example['text']))
|
||||
|
||||
assert example['tokens'] == tokens
|
||||
|
||||
else:
|
||||
raise Exception('Not implemented case {}'.format(case_type))
|
||||
|
||||
print("\r\x1b[K", end='')
|
||||
return knowledge
|
Loading…
Add table
Add a link
Reference in a new issue