Save structural elements.
This commit is contained in:
parent
fc37450565
commit
40b63128af
@ -3,6 +3,7 @@ import logging
|
||||
|
||||
from .session.org_mode import global_session as session
|
||||
|
||||
from .atoms import Atom
|
||||
from . import parsing
|
||||
from . import knowledge_evaluation
|
||||
from .modifiable_property import is_modifiable_property
|
||||
@ -20,6 +21,7 @@ class KnowledgeBase(object):
|
||||
self.examples = copy.copy(examples)
|
||||
self.trained = copy.copy(trained)
|
||||
self.tokenization = set()
|
||||
self.structural_elements = set()
|
||||
|
||||
def train_tokenizer(self, example):
|
||||
with session().log('Train'):
|
||||
@ -74,6 +76,14 @@ class KnowledgeBase(object):
|
||||
return parsing.pick_one_tokenization(options)
|
||||
return options
|
||||
|
||||
def add_tokenization(self, tokenization):
|
||||
with session().log('Added tokenization: “{}”'.format(tokenization)):
|
||||
self.tokenization.add(tokenization)
|
||||
for e in tokenization:
|
||||
if (not isinstance(e, Atom)) and (e not in self.structural_elements):
|
||||
session().annotate('Found new structural element “{}”'.format(e))
|
||||
self.structural_elements.add(e)
|
||||
|
||||
def process(self, row):
|
||||
knowledge_before = copy.deepcopy(self.knowledge)
|
||||
with session().log("Process: {}".format(row)):
|
||||
|
@ -99,7 +99,7 @@ def integrate_token_to_text_matching(knowledge_base, text, tokens):
|
||||
elements.append(texts[i])
|
||||
i += 1
|
||||
|
||||
knowledge_base.tokenization.add(tuple(elements))
|
||||
knowledge_base.add_tokenization(tuple(elements))
|
||||
|
||||
def pick_one_tokenization(options):
|
||||
'''
|
||||
|
Loading…
Reference in New Issue
Block a user