Save structural elements.

This commit is contained in:
kenkeiras 2018-04-15 17:07:29 +02:00
parent fc37450565
commit 40b63128af
2 changed files with 11 additions and 1 deletions

View File

@ -3,6 +3,7 @@ import logging
from .session.org_mode import global_session as session from .session.org_mode import global_session as session
from .atoms import Atom
from . import parsing from . import parsing
from . import knowledge_evaluation from . import knowledge_evaluation
from .modifiable_property import is_modifiable_property from .modifiable_property import is_modifiable_property
@ -20,6 +21,7 @@ class KnowledgeBase(object):
self.examples = copy.copy(examples) self.examples = copy.copy(examples)
self.trained = copy.copy(trained) self.trained = copy.copy(trained)
self.tokenization = set() self.tokenization = set()
self.structural_elements = set()
def train_tokenizer(self, example): def train_tokenizer(self, example):
with session().log('Train'): with session().log('Train'):
@ -74,6 +76,14 @@ class KnowledgeBase(object):
return parsing.pick_one_tokenization(options) return parsing.pick_one_tokenization(options)
return options return options
def add_tokenization(self, tokenization):
with session().log('Added tokenization: “{}'.format(tokenization)):
self.tokenization.add(tokenization)
for e in tokenization:
if (not isinstance(e, Atom)) and (e not in self.structural_elements):
session().annotate('Found new structural element “{}'.format(e))
self.structural_elements.add(e)
def process(self, row): def process(self, row):
knowledge_before = copy.deepcopy(self.knowledge) knowledge_before = copy.deepcopy(self.knowledge)
with session().log("Process: {}".format(row)): with session().log("Process: {}".format(row)):

View File

@ -99,7 +99,7 @@ def integrate_token_to_text_matching(knowledge_base, text, tokens):
elements.append(texts[i]) elements.append(texts[i])
i += 1 i += 1
knowledge_base.tokenization.add(tuple(elements)) knowledge_base.add_tokenization(tuple(elements))
def pick_one_tokenization(options): def pick_one_tokenization(options):
''' '''