from ..session.org_mode import global_session as session from ..knowledge_base import KnowledgeBase from ..utils.visuals import show_progbar from ..visualization import show_knowledge def _assert(args): assert(args) def _assert_msg(args, msg): assert args, msg EXAMPLES = [ ('example', { "text": 'cat', "tokens": ['cat'], }), ('example', { "text": 'cats', "tokens": ['cats'], "meaning": { 'cats': ('add-modifier', 'cat', 'plural') }, }), ('example', { "text": 'text separated by spaces', "tokens": ['text', 'separated', 'by', 'spaces'], }), ('example', { "text": 'is earth a planet?', "tokens": ['is', 'earth', 'a', 'planet', '?'], }), ('test', { "text": 'plane', "tokens": ['plane'], }), ('test', { "text": 'planes', "tokens": ['planes'], "meaning": { 'planes': ('add-modifier', 'plane', 'plural') }, }), ('test', { "text": 'some other text', "tokens": ['some', 'other', 'text'], }), ('test', { "text": 'is the sun a star?', "tokens": ['is', 'the', 'sun', 'a', 'star', '?'], }) ] def main(): knowledge = KnowledgeBase() total = len(EXAMPLES) for i, (case_type, example) in enumerate(EXAMPLES): show_progbar(i, total, example['text']) if case_type == 'example': with session().log(example['text']): knowledge.train_tokenizer(example) elif case_type == 'test': with session().log(example['text']): tokens = list(knowledge.tokenize(example['text'])) print(tokens) print(example['tokens']) assert example['tokens'] == tokens else: raise Exception('Not implemented case {}'.format(case_type)) print("\r\x1b[K", end='') return knowledge