68 lines
1.6 KiB
Python
68 lines
1.6 KiB
Python
|
from ..session.org_mode import global_session as session
|
||
|
from ..knowledge_base import KnowledgeBase
|
||
|
from ..utils.visuals import show_progbar
|
||
|
from ..visualization import show_knowledge
|
||
|
|
||
|
|
||
|
def _assert(args):
|
||
|
assert(args)
|
||
|
|
||
|
|
||
|
def _assert_msg(args, msg):
|
||
|
assert args, msg
|
||
|
|
||
|
|
||
|
EXAMPLES = [
|
||
|
('example', {
|
||
|
"text": 'cat',
|
||
|
"tokens": ['cat'],
|
||
|
}),
|
||
|
('example', {
|
||
|
"text": 'cats',
|
||
|
"tokens": ['cats'],
|
||
|
"meaning": { 'cats': ('add-modifier', 'cat', 'plural') },
|
||
|
}),
|
||
|
('example', {
|
||
|
"text": 'text separated by spaces',
|
||
|
"tokens": ['text', 'separated', 'by', 'spaces'],
|
||
|
}),
|
||
|
|
||
|
('test', {
|
||
|
"text": 'plane',
|
||
|
"tokens": ['plane'],
|
||
|
}),
|
||
|
('test', {
|
||
|
"text": 'planes',
|
||
|
"tokens": ['planes'],
|
||
|
"meaning": { 'planes': ('add-modifier', 'plane', 'plural') },
|
||
|
}),
|
||
|
('test', {
|
||
|
"text": 'some other text',
|
||
|
"tokens": ['some', 'other', 'text'],
|
||
|
})
|
||
|
]
|
||
|
|
||
|
|
||
|
def main():
|
||
|
knowledge = KnowledgeBase()
|
||
|
|
||
|
total = len(EXAMPLES)
|
||
|
|
||
|
for i, (case_type, example) in enumerate(EXAMPLES):
|
||
|
show_progbar(i, total, example['text'])
|
||
|
if case_type == 'example':
|
||
|
with session().log(example['text']):
|
||
|
knowledge.train_tokenizer(example)
|
||
|
|
||
|
elif case_type == 'test':
|
||
|
with session().log(example['text']):
|
||
|
tokens = list(knowledge.tokenize(example['text']))
|
||
|
|
||
|
assert example['tokens'] == tokens
|
||
|
|
||
|
else:
|
||
|
raise Exception('Not implemented case {}'.format(case_type))
|
||
|
|
||
|
print("\r\x1b[K", end='')
|
||
|
return knowledge
|