Save structural elements.
This commit is contained in:
parent
fc37450565
commit
40b63128af
@ -3,6 +3,7 @@ import logging
|
|||||||
|
|
||||||
from .session.org_mode import global_session as session
|
from .session.org_mode import global_session as session
|
||||||
|
|
||||||
|
from .atoms import Atom
|
||||||
from . import parsing
|
from . import parsing
|
||||||
from . import knowledge_evaluation
|
from . import knowledge_evaluation
|
||||||
from .modifiable_property import is_modifiable_property
|
from .modifiable_property import is_modifiable_property
|
||||||
@ -20,6 +21,7 @@ class KnowledgeBase(object):
|
|||||||
self.examples = copy.copy(examples)
|
self.examples = copy.copy(examples)
|
||||||
self.trained = copy.copy(trained)
|
self.trained = copy.copy(trained)
|
||||||
self.tokenization = set()
|
self.tokenization = set()
|
||||||
|
self.structural_elements = set()
|
||||||
|
|
||||||
def train_tokenizer(self, example):
|
def train_tokenizer(self, example):
|
||||||
with session().log('Train'):
|
with session().log('Train'):
|
||||||
@ -74,6 +76,14 @@ class KnowledgeBase(object):
|
|||||||
return parsing.pick_one_tokenization(options)
|
return parsing.pick_one_tokenization(options)
|
||||||
return options
|
return options
|
||||||
|
|
||||||
|
def add_tokenization(self, tokenization):
|
||||||
|
with session().log('Added tokenization: “{}”'.format(tokenization)):
|
||||||
|
self.tokenization.add(tokenization)
|
||||||
|
for e in tokenization:
|
||||||
|
if (not isinstance(e, Atom)) and (e not in self.structural_elements):
|
||||||
|
session().annotate('Found new structural element “{}”'.format(e))
|
||||||
|
self.structural_elements.add(e)
|
||||||
|
|
||||||
def process(self, row):
|
def process(self, row):
|
||||||
knowledge_before = copy.deepcopy(self.knowledge)
|
knowledge_before = copy.deepcopy(self.knowledge)
|
||||||
with session().log("Process: {}".format(row)):
|
with session().log("Process: {}".format(row)):
|
||||||
|
@ -99,7 +99,7 @@ def integrate_token_to_text_matching(knowledge_base, text, tokens):
|
|||||||
elements.append(texts[i])
|
elements.append(texts[i])
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
knowledge_base.tokenization.add(tuple(elements))
|
knowledge_base.add_tokenization(tuple(elements))
|
||||||
|
|
||||||
def pick_one_tokenization(options):
|
def pick_one_tokenization(options):
|
||||||
'''
|
'''
|
||||||
|
Loading…
Reference in New Issue
Block a user