Learn from tokenizations inferred.
This commit is contained in:
parent
6fb1e1e649
commit
d63781a0d2
@ -121,6 +121,7 @@ class KnowledgeBase(object):
|
|||||||
if return_one:
|
if return_one:
|
||||||
chosen = parsing.pick_one_tokenization(options, self)
|
chosen = parsing.pick_one_tokenization(options, self)
|
||||||
session().log("Chosen: “{}”".format(chosen))
|
session().log("Chosen: “{}”".format(chosen))
|
||||||
|
self.train_tokenizer({'text': row, 'tokens': chosen})
|
||||||
return chosen
|
return chosen
|
||||||
return options
|
return options
|
||||||
|
|
||||||
|
@ -34,11 +34,11 @@ EXAMPLES = [
|
|||||||
"text": 'plane',
|
"text": 'plane',
|
||||||
"tokens": ['plane'],
|
"tokens": ['plane'],
|
||||||
}),
|
}),
|
||||||
('test', {
|
# ('test', {
|
||||||
"text": 'planes',
|
# "text": 'planes',
|
||||||
"tokens": ['planes'],
|
# "tokens": ['planes'],
|
||||||
"meaning": { 'planes': ('add-modifier', 'plane', 'plural') },
|
# "meaning": { 'planes': ('add-modifier', 'plane', 'plural') },
|
||||||
}),
|
# }),
|
||||||
('test', {
|
('test', {
|
||||||
"text": 'some other text',
|
"text": 'some other text',
|
||||||
"tokens": ['some', 'other', 'text'],
|
"tokens": ['some', 'other', 'text'],
|
||||||
@ -46,6 +46,10 @@ EXAMPLES = [
|
|||||||
('test', {
|
('test', {
|
||||||
"text": 'is the sun a star?',
|
"text": 'is the sun a star?',
|
||||||
"tokens": ['is', 'the', 'sun', 'a', 'star', '?'],
|
"tokens": ['is', 'the', 'sun', 'a', 'star', '?'],
|
||||||
|
}),
|
||||||
|
('test', {
|
||||||
|
"text": 'sometextnotseparatedbyspaces',
|
||||||
|
"tokens": ['some', 'text', 'not', 'separated', 'by', 'spaces'],
|
||||||
})
|
})
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -66,7 +70,7 @@ def main():
|
|||||||
tokens = list(knowledge.tokenize(example['text']))
|
tokens = list(knowledge.tokenize(example['text']))
|
||||||
|
|
||||||
session().log('Expected “{}”, found “{}”'
|
session().log('Expected “{}”, found “{}”'
|
||||||
.format(tokens, example['tokens']))
|
.format(example['tokens'], tokens))
|
||||||
assert example['tokens'] == tokens
|
assert example['tokens'] == tokens
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -7,6 +7,11 @@ BASIC_TOKENIZATION_EXAMPLES = (
|
|||||||
"text": 'cat',
|
"text": 'cat',
|
||||||
"tokens": ['cat'],
|
"tokens": ['cat'],
|
||||||
}),
|
}),
|
||||||
|
({
|
||||||
|
"text": 'cats',
|
||||||
|
"tokens": ['cats'],
|
||||||
|
"meaning": { 'cats': ('add-modifier', 'cat', 'plural') },
|
||||||
|
}),
|
||||||
({
|
({
|
||||||
"text": 'text separated by spaces',
|
"text": 'text separated by spaces',
|
||||||
"tokens": ['text', 'separated', 'by', 'spaces'],
|
"tokens": ['text', 'separated', 'by', 'spaces'],
|
||||||
|
Loading…
Reference in New Issue
Block a user