Compare commits
1 Commits
naive-nlu
...
naive-nlu(
Author | SHA1 | Date | |
---|---|---|---|
fe7b550cdb |
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,7 +1,5 @@
|
|||||||
*#*
|
*#*
|
||||||
*~
|
*~
|
||||||
.vscode
|
|
||||||
*.ba?k
|
*.ba?k
|
||||||
*.pyc
|
*.pyc
|
||||||
__pycache__
|
__pycache__
|
||||||
treeNLU-*session*.org
|
|
||||||
|
@ -1,4 +0,0 @@
|
|||||||
from tree_nlu import cli
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
cli.main()
|
|
@ -1 +1,2 @@
|
|||||||
jsondiff
|
jsondiff
|
||||||
|
hy
|
||||||
|
@ -11,5 +11,6 @@ setup(name='tree_nlu',
|
|||||||
include_package_data=True,
|
include_package_data=True,
|
||||||
install_requires = [
|
install_requires = [
|
||||||
'jsondiff',
|
'jsondiff',
|
||||||
|
'hy',
|
||||||
],
|
],
|
||||||
zip_safe=False)
|
zip_safe=False)
|
||||||
|
@ -1,23 +0,0 @@
|
|||||||
'''
|
|
||||||
Analogous to erlang ones.
|
|
||||||
|
|
||||||
"An atom is a literal, a constant with name."
|
|
||||||
'''
|
|
||||||
|
|
||||||
from collections import namedtuple
|
|
||||||
|
|
||||||
Atom = namedtuple('Atom', field_names='name')
|
|
||||||
|
|
||||||
def is_atom(element, name=None):
|
|
||||||
'''Check if an element is an atom with a specific name.'''
|
|
||||||
if not isinstance(element, Atom):
|
|
||||||
return False
|
|
||||||
|
|
||||||
if name is None:
|
|
||||||
return True
|
|
||||||
|
|
||||||
return element.name == name
|
|
||||||
|
|
||||||
def a(name):
|
|
||||||
'''Build an atom with a given name.'''
|
|
||||||
return Atom(name)
|
|
@ -1,65 +0,0 @@
|
|||||||
import logging
|
|
||||||
import datetime
|
|
||||||
from .session.org_mode import (
|
|
||||||
global_session as session,
|
|
||||||
create_global_session,
|
|
||||||
)
|
|
||||||
from .knowledge_base import KnowledgeBase
|
|
||||||
from .visualization import (
|
|
||||||
show_knowledge,
|
|
||||||
show_samples,
|
|
||||||
)
|
|
||||||
from .tests import gac_100
|
|
||||||
from .modifiable_property import (
|
|
||||||
ModifiableProperty,
|
|
||||||
ModifiablePropertyWithAst,
|
|
||||||
is_modifiable_property,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
bye_phrases = ['bye', 'exit']
|
|
||||||
|
|
||||||
|
|
||||||
def gen_session_name():
|
|
||||||
now = datetime.datetime.utcnow()
|
|
||||||
return "treeNLU-cli-session-{}.org".format(
|
|
||||||
now.strftime("%y_%m_%d %H:%M:%S_%f"))
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
create_global_session(gen_session_name())
|
|
||||||
logging.getLogger().setLevel(logging.INFO)
|
|
||||||
knowledge = gac_100.main()
|
|
||||||
logging.getLogger().setLevel(logging.DEBUG)
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
data = input("> ").strip()
|
|
||||||
except EOFError:
|
|
||||||
print("bye")
|
|
||||||
break
|
|
||||||
if data.lower() in bye_phrases:
|
|
||||||
break
|
|
||||||
if not data:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if data == '/show':
|
|
||||||
show_knowledge(knowledge)
|
|
||||||
continue
|
|
||||||
elif data == '/samples':
|
|
||||||
show_samples(knowledge)
|
|
||||||
continue
|
|
||||||
|
|
||||||
with session().log(data):
|
|
||||||
ret = knowledge.process(data)
|
|
||||||
if ret:
|
|
||||||
result, _, _ = ret
|
|
||||||
if not is_modifiable_property(result):
|
|
||||||
print("<", result)
|
|
||||||
else:
|
|
||||||
result.setter()
|
|
||||||
print("OK")
|
|
||||||
elif ret is None:
|
|
||||||
print("- Couldn't understand that, oops... -")
|
|
||||||
else:
|
|
||||||
print("Unhandled response:", ret)
|
|
||||||
print("< Bye!")
|
|
@ -1,65 +1,45 @@
|
|||||||
import copy
|
import copy
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from .session.org_mode import global_session as session
|
from . import parsing
|
||||||
|
|
||||||
from .atoms import Atom
|
|
||||||
from . import layered_model
|
|
||||||
from . import knowledge_evaluation
|
from . import knowledge_evaluation
|
||||||
from .modifiable_property import is_modifiable_property
|
from .modifiable_property import is_modifiable_property
|
||||||
import random
|
|
||||||
|
|
||||||
def diff_knowledge(before, after):
|
def diff_knowledge(before, after):
|
||||||
import jsondiff
|
import jsondiff
|
||||||
return jsondiff.diff(before, after)
|
return jsondiff.diff(before, after)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class KnowledgeBase(object):
|
class KnowledgeBase(object):
|
||||||
def __init__(self, knowledge={}, examples=[], trained=[]):
|
def __init__(self, knowledge, examples=[], trained=[]):
|
||||||
self.knowledge = copy.copy(knowledge)
|
self.knowledge = copy.copy(knowledge)
|
||||||
self.originals = []
|
|
||||||
self.examples = copy.copy(examples)
|
self.examples = copy.copy(examples)
|
||||||
self.trained = copy.copy(trained)
|
self.trained = copy.copy(trained)
|
||||||
self.layers = layered_model.BaseModel(self)
|
|
||||||
|
|
||||||
## Parsing
|
|
||||||
def train(self, examples):
|
def train(self, examples):
|
||||||
knowledge_before = copy.deepcopy(self.knowledge)
|
knowledge_before = copy.deepcopy(self.knowledge)
|
||||||
with session().log('Train'):
|
|
||||||
# Parse everything
|
# Parse everything
|
||||||
|
parsed_examples = []
|
||||||
for example in examples:
|
for example in examples:
|
||||||
# If there's parsed data, leverage it ASAP
|
logging.info("\x1b[7;32m> {} \x1b[0m".format(example))
|
||||||
if 'parsed' in example and isinstance(example['parsed'], tuple):
|
tokens, decomposition, inferred_tree = parsing.integrate_language(self, example)
|
||||||
with session().log('parsed information integration'):
|
logging.info(tokens)
|
||||||
result = knowledge_evaluation.integrate_information(self.knowledge, {
|
|
||||||
"parsed": example['parsed'],
|
|
||||||
})
|
|
||||||
self.act_upon(result)
|
|
||||||
|
|
||||||
with session().log("language integration"):
|
|
||||||
for tokens, decomposition, inferred_tree in self.layers.integrate(self, example):
|
|
||||||
session().annotate("Tokens: {}".format(tokens))
|
|
||||||
session().annotate("Inferred tree: {}".format(inferred_tree))
|
|
||||||
|
|
||||||
with session().log("full information integration"):
|
|
||||||
tokens = self.layers.tokenization.tokenize(example['text'], return_one=True)
|
|
||||||
result = knowledge_evaluation.integrate_information(self.knowledge, {
|
result = knowledge_evaluation.integrate_information(self.knowledge, {
|
||||||
"elements": tokens,
|
"elements": tokens,
|
||||||
"decomposition": decomposition,
|
"decomposition": decomposition,
|
||||||
"parsed": inferred_tree,
|
"parsed": inferred_tree,
|
||||||
})
|
})
|
||||||
|
|
||||||
session().annotate("Result: {}".format(self.get_value(result)))
|
logging.info("\x1b[7;33m< {} \x1b[0m".format(self.get_value(result)))
|
||||||
self.act_upon(result)
|
self.act_upon(result)
|
||||||
session().annotate("Set: {}".format(self.get_value(result)))
|
logging.info("\x1b[7;34m> set: {} \x1b[0m".format(self.get_value(result)))
|
||||||
self.examples.append((decomposition, inferred_tree))
|
self.examples.append((decomposition, inferred_tree))
|
||||||
self.originals.append(example['text'])
|
|
||||||
|
|
||||||
# Reduce values
|
# Reduce values
|
||||||
with session().log("reprocessing"):
|
self.trained = parsing.reprocess_language_knowledge(self, self.examples)
|
||||||
res = self.layers.reprocess(self.examples)
|
|
||||||
self.trained = res
|
|
||||||
|
|
||||||
knowledge_after = copy.deepcopy(self.knowledge)
|
knowledge_after = copy.deepcopy(self.knowledge)
|
||||||
knowledge_diff_getter = lambda: diff_knowledge(knowledge_before,
|
knowledge_diff_getter = lambda: diff_knowledge(knowledge_before,
|
||||||
@ -67,21 +47,18 @@ class KnowledgeBase(object):
|
|||||||
|
|
||||||
return knowledge_diff_getter
|
return knowledge_diff_getter
|
||||||
|
|
||||||
|
|
||||||
def process(self, row):
|
def process(self, row):
|
||||||
knowledge_before = copy.deepcopy(self.knowledge)
|
knowledge_before = copy.deepcopy(self.knowledge)
|
||||||
with session().log("Process: {}".format(row)):
|
logging.info("\x1b[7;32m> {} \x1b[0m".format(row))
|
||||||
fit = list(self.layers.process(self, row))
|
tokens = parsing.to_tokens(row)
|
||||||
if len(fit) == 0:
|
tokens, inferred_tree = parsing.get_fit(self, tokens)
|
||||||
return None
|
|
||||||
|
|
||||||
tokens, inferred_tree = fit[0]
|
|
||||||
result = knowledge_evaluation.integrate_information(self.knowledge,
|
result = knowledge_evaluation.integrate_information(self.knowledge,
|
||||||
{
|
{
|
||||||
"elements": tokens,
|
"elements": tokens,
|
||||||
"parsed": inferred_tree,
|
"parsed": inferred_tree,
|
||||||
})
|
})
|
||||||
self.act_upon(result)
|
self.act_upon(result)
|
||||||
session().annotate("Result: {}".format(result))
|
|
||||||
|
|
||||||
knowledge_after = copy.deepcopy(self.knowledge)
|
knowledge_after = copy.deepcopy(self.knowledge)
|
||||||
knowledge_diff_getter = lambda: diff_knowledge(knowledge_before,
|
knowledge_diff_getter = lambda: diff_knowledge(knowledge_before,
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
from .session.org_mode import global_session as session
|
|
||||||
|
|
||||||
from .modifiable_property import (
|
from .modifiable_property import (
|
||||||
ModifiableProperty,
|
ModifiableProperty,
|
||||||
ModifiablePropertyWithAst,
|
ModifiablePropertyWithAst,
|
||||||
@ -11,7 +9,6 @@ def resolve(knowledge_base, elements, value):
|
|||||||
if isinstance(value, int):
|
if isinstance(value, int):
|
||||||
return elements[value]
|
return elements[value]
|
||||||
elif isinstance(value, tuple) or isinstance(value, list):
|
elif isinstance(value, tuple) or isinstance(value, list):
|
||||||
session().annotate("V: {} {}".format(value, elements))
|
|
||||||
return integrate_information(knowledge_base, {
|
return integrate_information(knowledge_base, {
|
||||||
"elements": elements,
|
"elements": elements,
|
||||||
"parsed": value,
|
"parsed": value,
|
||||||
@ -44,42 +41,16 @@ def get_subquery_type(knowledge_base, atom):
|
|||||||
|
|
||||||
|
|
||||||
def property_for_value(knowledge_base, value):
|
def property_for_value(knowledge_base, value):
|
||||||
if value in knowledge_base:
|
|
||||||
# Annotate the property as property
|
|
||||||
groups = knowledge_base[value].get('groups', {'property'})
|
|
||||||
groups.add('property')
|
|
||||||
knowledge_base[value]['groups'] = groups
|
|
||||||
|
|
||||||
# And find the property "name"
|
|
||||||
if 'as_property' in knowledge_base[value]:
|
|
||||||
return knowledge_base[value]['as_property']
|
return knowledge_base[value]['as_property']
|
||||||
|
|
||||||
return knowledge_base[value].get('groups', {'property'})
|
|
||||||
else:
|
|
||||||
# Consider that any property is... a property
|
|
||||||
knowledge_base[value] = {'groups': {'property'}}
|
|
||||||
return {'property'}
|
|
||||||
|
|
||||||
|
|
||||||
def modifiable_property_from_property(prop, path, value):
|
def modifiable_property_from_property(prop, path, value):
|
||||||
def getter():
|
def getter():
|
||||||
nonlocal prop, path, value
|
nonlocal prop, path, value
|
||||||
if isinstance(path, set):
|
|
||||||
# If the property is from a set, it's true if any possible
|
|
||||||
# path has a element as true
|
|
||||||
return any(map(lambda possible_path: ((possible_path in prop)
|
|
||||||
and
|
|
||||||
(prop[possible_path] == value)),
|
|
||||||
path))
|
|
||||||
else:
|
|
||||||
return (path in prop) and prop[path] == value
|
return (path in prop) and prop[path] == value
|
||||||
|
|
||||||
def setter():
|
def setter():
|
||||||
nonlocal prop, path, value
|
nonlocal prop, path, value
|
||||||
if isinstance(path, set):
|
|
||||||
for possible_path in path:
|
|
||||||
prop[possible_path] = value
|
|
||||||
else:
|
|
||||||
prop[path] = value
|
prop[path] = value
|
||||||
|
|
||||||
return ModifiableProperty(
|
return ModifiableProperty(
|
||||||
@ -103,31 +74,12 @@ def exists_property_with_value(knowledge_base, elements, subj, value):
|
|||||||
|
|
||||||
|
|
||||||
def modifiable_element_for_existance_in_set(container, set_name, element):
|
def modifiable_element_for_existance_in_set(container, set_name, element):
|
||||||
session().annotate("-----({} {} {})".format(container, set_name, element))
|
|
||||||
|
|
||||||
def getter():
|
def getter():
|
||||||
nonlocal container, set_name, element
|
nonlocal container, set_name, element
|
||||||
session().annotate(" get({} {} {})".format(container, set_name, element))
|
|
||||||
return (set_name in container) and (element in container[set_name])
|
return (set_name in container) and (element in container[set_name])
|
||||||
|
|
||||||
def setter():
|
def setter():
|
||||||
nonlocal container, set_name, element
|
nonlocal container, set_name, element
|
||||||
session().annotate(" add({} {} {})".format(container, set_name, element))
|
|
||||||
return container[set_name].add(element)
|
|
||||||
|
|
||||||
return ModifiableProperty(
|
|
||||||
getter=getter,
|
|
||||||
setter=setter,
|
|
||||||
)
|
|
||||||
|
|
||||||
def modifiable_element_for_existance_in_group(container, element, backlink, set_name='groups'):
|
|
||||||
def getter():
|
|
||||||
nonlocal container, element, backlink, set_name
|
|
||||||
return (set_name in container) and (element in container[set_name])
|
|
||||||
|
|
||||||
def setter():
|
|
||||||
nonlocal container, set_name, element
|
|
||||||
backlink['groups'].add(set_name)
|
|
||||||
return container[set_name].add(element)
|
return container[set_name].add(element)
|
||||||
|
|
||||||
return ModifiableProperty(
|
return ModifiableProperty(
|
||||||
@ -140,23 +92,18 @@ def pertenence_to_group(knowledge_base, elements, subj, group):
|
|||||||
group = resolve(knowledge_base, elements, group)
|
group = resolve(knowledge_base, elements, group)
|
||||||
|
|
||||||
if subj not in knowledge_base:
|
if subj not in knowledge_base:
|
||||||
knowledge_base[subj] = {'groups': set()}
|
knowledge_base[subj] = {}
|
||||||
|
|
||||||
if "groups" not in knowledge_base[subj]:
|
if "groups" not in knowledge_base[subj]:
|
||||||
knowledge_base[subj]["groups"] = set()
|
knowledge_base[subj]["groups"] = set()
|
||||||
|
|
||||||
if group not in knowledge_base:
|
return modifiable_element_for_existance_in_set(
|
||||||
knowledge_base[group] = {'groups': set()}
|
|
||||||
|
|
||||||
if "groups" not in knowledge_base[group]:
|
|
||||||
knowledge_base[group]["groups"] = set()
|
|
||||||
|
|
||||||
return modifiable_element_for_existance_in_group(
|
|
||||||
container=knowledge_base[subj],
|
container=knowledge_base[subj],
|
||||||
element=group,
|
set_name="groups",
|
||||||
backlink=knowledge_base[group],
|
element=group
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def has_capacity(knowledge_base, elements, subj, capacity):
|
def has_capacity(knowledge_base, elements, subj, capacity):
|
||||||
subj = resolve(knowledge_base, elements, subj)
|
subj = resolve(knowledge_base, elements, subj)
|
||||||
capacity = resolve(knowledge_base, elements, capacity)
|
capacity = resolve(knowledge_base, elements, capacity)
|
||||||
@ -181,70 +128,12 @@ def question(knowledge_base, elements, subj):
|
|||||||
return subj.getter()
|
return subj.getter()
|
||||||
return subj
|
return subj
|
||||||
|
|
||||||
def implies(knowledge_base, elements, precedent, consequent):
|
|
||||||
precedent = resolve(knowledge_base, elements, precedent)
|
|
||||||
consequent = resolve(knowledge_base, elements, consequent)
|
|
||||||
|
|
||||||
if precedent not in knowledge_base:
|
|
||||||
knowledge_base[precedent] = {'groups': set()}
|
|
||||||
|
|
||||||
if "implications" not in knowledge_base[precedent]:
|
|
||||||
knowledge_base[precedent]["implications"] = set()
|
|
||||||
|
|
||||||
return modifiable_element_for_existance_in_set(
|
|
||||||
container=knowledge_base[precedent],
|
|
||||||
set_name="implications",
|
|
||||||
element=consequent
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def property_has_value(knowledge_base, elements, subj, prop, value):
|
|
||||||
subj = resolve(knowledge_base, elements, subj)
|
|
||||||
prop = resolve(knowledge_base, elements, prop)
|
|
||||||
value = resolve(knowledge_base, elements, value)
|
|
||||||
|
|
||||||
if subj not in knowledge_base:
|
|
||||||
knowledge_base[subj] = {'groups': set()}
|
|
||||||
|
|
||||||
if prop not in knowledge_base[subj]:
|
|
||||||
knowledge_base[subj][prop] = set()
|
|
||||||
|
|
||||||
return modifiable_element_for_existance_in_set(
|
|
||||||
container=knowledge_base[subj],
|
|
||||||
set_name=prop,
|
|
||||||
element=value
|
|
||||||
)
|
|
||||||
|
|
||||||
def perform_verb_over_object(knowledge_base, elements, subj, verb, obj):
|
|
||||||
subj = resolve(knowledge_base, elements, subj)
|
|
||||||
verb = resolve(knowledge_base, elements, verb)
|
|
||||||
obj = resolve(knowledge_base, elements, obj)
|
|
||||||
session().annotate("({} {} {})".format(verb, subj, obj))
|
|
||||||
|
|
||||||
if subj not in knowledge_base:
|
|
||||||
knowledge_base[subj] = {'groups': set()}
|
|
||||||
|
|
||||||
if 'performs-over' not in knowledge_base[subj]:
|
|
||||||
knowledge_base[subj]['performs-over'] = {}
|
|
||||||
|
|
||||||
if verb not in knowledge_base[subj]['performs-over']:
|
|
||||||
knowledge_base[subj]['performs-over'][verb] = set()
|
|
||||||
|
|
||||||
return modifiable_element_for_existance_in_set(
|
|
||||||
container=knowledge_base[subj]['performs-over'],
|
|
||||||
set_name=verb,
|
|
||||||
element=obj
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
knowledge_ingestion = {
|
knowledge_ingestion = {
|
||||||
"exists-property-with-value": exists_property_with_value,
|
"exists-property-with-value": exists_property_with_value,
|
||||||
"pertenence-to-group": pertenence_to_group,
|
"pertenence-to-group": pertenence_to_group,
|
||||||
"has-capacity": has_capacity,
|
"has-capacity": has_capacity,
|
||||||
"question": question,
|
"question": question,
|
||||||
"implies": implies,
|
|
||||||
"property-has-value": property_has_value,
|
|
||||||
"perform-verb-over-object": perform_verb_over_object,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -263,29 +152,6 @@ def integrate_information(knowledge_base, example):
|
|||||||
args = ast[1:]
|
args = ast[1:]
|
||||||
elements = example.get('elements', None)
|
elements = example.get('elements', None)
|
||||||
|
|
||||||
session().annotate("Integrating:")
|
|
||||||
session().annotate("AST: {}".format(ast))
|
|
||||||
session().annotate("ARG: {}".format(elements))
|
|
||||||
session().annotate("------------")
|
|
||||||
|
|
||||||
return tagged_with_ast(
|
return tagged_with_ast(
|
||||||
ast, elements,
|
ast, elements,
|
||||||
knowledge_ingestion[method](knowledge_base, elements, *args))
|
knowledge_ingestion[method](knowledge_base, elements, *args))
|
||||||
|
|
||||||
def can_be_used_in_place(knowledge, token, minisegment):
|
|
||||||
if token not in knowledge.knowledge:
|
|
||||||
return True
|
|
||||||
|
|
||||||
info = knowledge.knowledge[token]
|
|
||||||
info_groups = info.get('groups', set())
|
|
||||||
minisegment_groups = minisegment.get('groups', set())
|
|
||||||
|
|
||||||
# Common group
|
|
||||||
if len(info_groups & minisegment_groups) > 0:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Neither has a group
|
|
||||||
elif len(info_groups) == 0 == len(minisegment_groups):
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
@ -1,49 +0,0 @@
|
|||||||
from .layers import tokenization_layer
|
|
||||||
from .layers import parsing_layer
|
|
||||||
from .layers import parsing
|
|
||||||
from .session.org_mode import global_session as session
|
|
||||||
|
|
||||||
|
|
||||||
def make_yield_pipe(layers, knowledge_base, example, func):
|
|
||||||
if len(layers) < 1:
|
|
||||||
yield example
|
|
||||||
return
|
|
||||||
|
|
||||||
input_generator = make_yield_pipe(layers[:-1], knowledge_base, example, func)
|
|
||||||
for input in input_generator:
|
|
||||||
session().annotate("[{}] --> {}".format(len(layers), input))
|
|
||||||
for d in list(func(layers[-1], input)):
|
|
||||||
yield d
|
|
||||||
|
|
||||||
|
|
||||||
class BaseModel:
|
|
||||||
def __init__(self, knowledge_base):
|
|
||||||
self.tokenization = tokenization_layer.TokenizationLayer(knowledge_base)
|
|
||||||
self.parsing = parsing_layer.ParsingLayer()
|
|
||||||
|
|
||||||
self.layers = [
|
|
||||||
self.tokenization,
|
|
||||||
self.parsing,
|
|
||||||
]
|
|
||||||
|
|
||||||
def reprocess(self, examples):
|
|
||||||
pattern_examples = []
|
|
||||||
for i, sample in enumerate(examples):
|
|
||||||
other = examples[:i] + examples[i + 1:]
|
|
||||||
match = parsing.get_matching(sample, other)
|
|
||||||
if len(match) > 0:
|
|
||||||
sample = (match, sample[1],)
|
|
||||||
pattern_examples.append(sample)
|
|
||||||
|
|
||||||
return pattern_examples
|
|
||||||
|
|
||||||
def integrate(self, knowledge_base, example):
|
|
||||||
yield from make_yield_pipe(self.layers, knowledge_base,
|
|
||||||
example, lambda l, i: l.integrate(knowledge_base, i))
|
|
||||||
|
|
||||||
def process(self, knowledge_base, example):
|
|
||||||
yield from make_yield_pipe(self.layers, knowledge_base,
|
|
||||||
example, lambda l, i: l.process(knowledge_base, i))
|
|
||||||
|
|
||||||
def tokenize(self, row, return_one=True):
|
|
||||||
return self.tokenization.to_tokens(row)
|
|
@ -1,500 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
from ..session.org_mode import global_session as session
|
|
||||||
import re
|
|
||||||
import copy
|
|
||||||
|
|
||||||
from functools import reduce
|
|
||||||
from typing import List, Dict
|
|
||||||
from ..modifiable_property import ModifiableProperty
|
|
||||||
from .. import parameters
|
|
||||||
from ..atoms import Atom, a, is_atom
|
|
||||||
from .. import knowledge_evaluation
|
|
||||||
|
|
||||||
def make_template(knowledge_base, tokens, parsed):
|
|
||||||
matcher = list(tokens)
|
|
||||||
template = list(parsed)
|
|
||||||
session().annotate(" -- MK TEMPLATE --")
|
|
||||||
session().annotate("MATCHR: {}".format(matcher))
|
|
||||||
session().annotate("TEMPLT: {}".format(template))
|
|
||||||
for i in range(len(matcher)):
|
|
||||||
word = matcher[i]
|
|
||||||
if word in template:
|
|
||||||
template[template.index(word)] = i
|
|
||||||
matcher[i] = {
|
|
||||||
'groups': set(knowledge_base.knowledge.get(word, {}).get('groups', set())),
|
|
||||||
}
|
|
||||||
return tokens, matcher, template
|
|
||||||
|
|
||||||
|
|
||||||
def is_bottom_level(tree):
|
|
||||||
for element in tree:
|
|
||||||
if isinstance(element, list) or isinstance(element, tuple):
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def get_lower_levels(parsed):
|
|
||||||
lower = []
|
|
||||||
def aux(subtree, path):
|
|
||||||
nonlocal lower
|
|
||||||
deeper = len(path) == 0
|
|
||||||
for i, element in enumerate(subtree):
|
|
||||||
if isinstance(element, list) or isinstance(element, tuple):
|
|
||||||
aux(element, path + (i,))
|
|
||||||
deeper = True
|
|
||||||
|
|
||||||
if not deeper:
|
|
||||||
lower.append((path, subtree))
|
|
||||||
|
|
||||||
aux(parsed, path=())
|
|
||||||
return lower
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: probably optimize this, it creates lots of unnecessary tuples
|
|
||||||
def replace_position(tree, position, new_element):
|
|
||||||
session().annotate("REPLACE POSITIONS:")
|
|
||||||
session().annotate(" TREE : {}".format(tree))
|
|
||||||
session().annotate("POSITION: {}".format(position))
|
|
||||||
session().annotate("NEW ELEM: {}".format(new_element))
|
|
||||||
session().annotate("------------------")
|
|
||||||
|
|
||||||
def aux(current_tree, remaining_route):
|
|
||||||
if len(remaining_route) == 0:
|
|
||||||
return new_element
|
|
||||||
|
|
||||||
else:
|
|
||||||
step = remaining_route[0]
|
|
||||||
return (
|
|
||||||
tree[:step]
|
|
||||||
+ (aux(tree[step], remaining_route[1:]),)
|
|
||||||
+ tree[step + 2:]
|
|
||||||
)
|
|
||||||
|
|
||||||
result = aux(tree, position)
|
|
||||||
session().annotate("-RESULT: {}".format(result))
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def integrate_language(knowledge_base, example):
|
|
||||||
text = example["text"].lower()
|
|
||||||
parsed = example["parsed"]
|
|
||||||
|
|
||||||
tokens = example['tokens']
|
|
||||||
resolved_parsed = copy.deepcopy(parsed)
|
|
||||||
|
|
||||||
while True:
|
|
||||||
session().annotate("P: {}".format(resolved_parsed))
|
|
||||||
lower_levels = get_lower_levels(resolved_parsed)
|
|
||||||
session().annotate("Lower: {}".format(lower_levels))
|
|
||||||
if len(lower_levels) == 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
for position, atom in lower_levels:
|
|
||||||
with session().log("Atom {}".format(atom)):
|
|
||||||
result = None
|
|
||||||
similars = get_similar_tree(knowledge_base, atom, tokens)
|
|
||||||
for similar in similars:
|
|
||||||
result = build_remix_matrix(knowledge_base, tokens, atom, similar)
|
|
||||||
if result is not None:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise Exception('Similar not found')
|
|
||||||
|
|
||||||
remix, (start_bounds, end_bounds) = result
|
|
||||||
|
|
||||||
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
|
|
||||||
session().annotate("--FIND MIX--")
|
|
||||||
session().annotate("-MIX- | {}".format(remix))
|
|
||||||
session().annotate("-FRM- | {}".format(tokens))
|
|
||||||
session().annotate("-AFT- | {}".format(after_remix))
|
|
||||||
|
|
||||||
session().annotate("--- TEMPLATE ---")
|
|
||||||
|
|
||||||
_, matcher, result = make_template(knowledge_base, after_remix, atom)
|
|
||||||
session().annotate("Tx: {}".format(after_remix))
|
|
||||||
session().annotate("Mx: {}".format(matcher))
|
|
||||||
session().annotate("Rx: {}".format(result))
|
|
||||||
session().annotate("Sx: {}".format(start_bounds))
|
|
||||||
session().annotate("Ex: {}".format(end_bounds))
|
|
||||||
|
|
||||||
|
|
||||||
assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens))
|
|
||||||
session().annotate( " +-> {}".format(after_remix))
|
|
||||||
subquery_type = knowledge_evaluation.get_subquery_type(knowledge_base.knowledge, atom)
|
|
||||||
session().annotate(r" \-> <{}>".format(subquery_type))
|
|
||||||
|
|
||||||
# Clean remaining tokens
|
|
||||||
new_tokens = list(tokens)
|
|
||||||
offset = len(start_bounds)
|
|
||||||
for _ in range(len(remix)):
|
|
||||||
new_tokens.pop(offset)
|
|
||||||
|
|
||||||
# TODO: Get a specific types for... types
|
|
||||||
new_tokens.insert(offset, (subquery_type, remix))
|
|
||||||
tokens = new_tokens
|
|
||||||
|
|
||||||
resolved_parsed = replace_position(resolved_parsed, position, offset)
|
|
||||||
session().annotate("RP: {}".format(resolved_parsed))
|
|
||||||
session().annotate("AT: {}".format(atom))
|
|
||||||
session().annotate("#########")
|
|
||||||
|
|
||||||
|
|
||||||
tokens, matcher, result = make_template(knowledge_base, tokens, resolved_parsed)
|
|
||||||
session().annotate("T: {}".format(tokens))
|
|
||||||
session().annotate("M: {}".format(matcher))
|
|
||||||
session().annotate("R: {}".format(result))
|
|
||||||
session().annotate("---")
|
|
||||||
yield tokens, matcher, result
|
|
||||||
|
|
||||||
|
|
||||||
def apply_remix(tokens, remix):
|
|
||||||
rebuilt = []
|
|
||||||
for i in remix:
|
|
||||||
if isinstance(i, int):
|
|
||||||
if i >= len(tokens):
|
|
||||||
return None
|
|
||||||
rebuilt.append(tokens[i])
|
|
||||||
else:
|
|
||||||
assert(isinstance(i, str))
|
|
||||||
rebuilt.append(i)
|
|
||||||
return rebuilt
|
|
||||||
|
|
||||||
|
|
||||||
def build_remix_matrix(knowledge_base, tokens, atom, similar):
|
|
||||||
tokens = list(tokens)
|
|
||||||
with session().log("Remix matrix for {} - {}".format(tokens, atom)):
|
|
||||||
tokens, matcher, result = make_template(knowledge_base, tokens, atom)
|
|
||||||
similar_matcher, similar_result, similar_result_resolved, _, _ = similar
|
|
||||||
|
|
||||||
start_bounds, end_bounds = find_bounds(knowledge_base, matcher, similar_matcher)
|
|
||||||
|
|
||||||
for i, element in (end_bounds + start_bounds[::-1]):
|
|
||||||
matcher.pop(i)
|
|
||||||
tokens.pop(i)
|
|
||||||
|
|
||||||
possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher)
|
|
||||||
session().annotate("Possible remixes: {}".format(possible_remixes))
|
|
||||||
if len(possible_remixes) < 1:
|
|
||||||
return None
|
|
||||||
|
|
||||||
chosen_remix = possible_remixes[0]
|
|
||||||
|
|
||||||
return chosen_remix, (start_bounds, end_bounds)
|
|
||||||
|
|
||||||
|
|
||||||
def get_possible_remixes(knowledge_base, matcher, similar_matcher):
|
|
||||||
|
|
||||||
matrix = []
|
|
||||||
with session().log("Possible remixes from matcher: {}".format(matcher)):
|
|
||||||
for element in matcher:
|
|
||||||
with session().log("Element `{}`".format(element)):
|
|
||||||
session().annotate("Similar `{}`".format(similar_matcher))
|
|
||||||
if element in similar_matcher or isinstance(element, dict):
|
|
||||||
if isinstance(element, dict):
|
|
||||||
indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
|
|
||||||
session().annotate("Dict element matching: {}".format(indexes))
|
|
||||||
else:
|
|
||||||
indexes = all_indexes(similar_matcher, element)
|
|
||||||
session().annotate("* element matching: {}".format(indexes))
|
|
||||||
matrix.append(indexes)
|
|
||||||
else:
|
|
||||||
session().annotate("`else` element matching: [element]")
|
|
||||||
matrix.append([element])
|
|
||||||
|
|
||||||
# TODO: do some scoring to find the most "interesting combination"
|
|
||||||
return [list(x) for x in list(zip(*matrix))]
|
|
||||||
|
|
||||||
|
|
||||||
def all_indexes(collection, element):
|
|
||||||
indexes = []
|
|
||||||
base = 0
|
|
||||||
|
|
||||||
for _ in range(collection.count(element)):
|
|
||||||
i = collection.index(element, base)
|
|
||||||
base = i + 1
|
|
||||||
indexes.append(i)
|
|
||||||
|
|
||||||
return indexes
|
|
||||||
|
|
||||||
|
|
||||||
def all_matching_indexes(knowledge_base, collection, element):
|
|
||||||
indexes = []
|
|
||||||
|
|
||||||
with session().log('Matching “{}”'.format(element)):
|
|
||||||
assert("groups" in element)
|
|
||||||
element = element["groups"]
|
|
||||||
for i, instance in enumerate(collection):
|
|
||||||
session().log('Checking “{}”'.format(instance))
|
|
||||||
|
|
||||||
if isinstance(instance, dict):
|
|
||||||
instance = instance["groups"]
|
|
||||||
elif instance in knowledge_base.knowledge:
|
|
||||||
session().log('Knowledge about “{}”: ”{}”'.format(instance, knowledge_base.knowledge[instance]))
|
|
||||||
|
|
||||||
if "groups" not in knowledge_base.knowledge[instance]:
|
|
||||||
# This means that is only known as token
|
|
||||||
# so we should try to avoid using it
|
|
||||||
continue
|
|
||||||
|
|
||||||
instance = knowledge_base.knowledge[instance]["groups"]
|
|
||||||
|
|
||||||
intersection = set(instance) & set(element)
|
|
||||||
if (len(intersection) > 0 or (0 == len(instance) == len(element))):
|
|
||||||
indexes.append((i, intersection))
|
|
||||||
|
|
||||||
return [x[0] for x in sorted(indexes, key=lambda x: len(x[1]), reverse=True)]
|
|
||||||
|
|
||||||
|
|
||||||
def element_matches_groups(knowledge, element: Dict, groups):
|
|
||||||
with session().log("Checking if e “{}” matches groups “{}”".format(element, groups)):
|
|
||||||
if isinstance(groups, str) and groups in knowledge:
|
|
||||||
return len(knowledge[groups].get("groups", set()) & element['groups']) > 0
|
|
||||||
elif isinstance(groups, dict):
|
|
||||||
return len(element.get("groups", set()) & element['groups']) > 0
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def find_bounds(knowledge, matcher, similar_matcher):
|
|
||||||
start_bounds = []
|
|
||||||
for i, element in enumerate(matcher):
|
|
||||||
if element in similar_matcher:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
start_bounds.append((i, element))
|
|
||||||
|
|
||||||
end_bounds = []
|
|
||||||
for i, element in enumerate(matcher[::-1]):
|
|
||||||
in_similar = False
|
|
||||||
if isinstance(element, str):
|
|
||||||
in_similar = element in similar_matcher
|
|
||||||
elif isinstance(element, dict):
|
|
||||||
in_similar = any(map(lambda groups: element_matches_groups(knowledge.knowledge,
|
|
||||||
element, groups),
|
|
||||||
similar_matcher))
|
|
||||||
|
|
||||||
if in_similar:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
end_bounds.append((len(matcher) - (i + 1), element))
|
|
||||||
|
|
||||||
return start_bounds, end_bounds
|
|
||||||
|
|
||||||
|
|
||||||
def get_similar_tree(knowledge_base, atom, tokens):
|
|
||||||
possibilities = []
|
|
||||||
|
|
||||||
# Find matching possibilities
|
|
||||||
for entry, tree in knowledge_base.trained:
|
|
||||||
if not is_bottom_level(tree):
|
|
||||||
continue
|
|
||||||
if tree[0] == atom[0]:
|
|
||||||
possibilities.append((entry, tree))
|
|
||||||
|
|
||||||
# Sort by more matching elements
|
|
||||||
sorted_possibilities = []
|
|
||||||
for (raw, possibility) in possibilities:
|
|
||||||
resolved = []
|
|
||||||
for element in atom:
|
|
||||||
if isinstance(element, str):
|
|
||||||
resolved.append(element)
|
|
||||||
else:
|
|
||||||
resolved.append(knowledge_evaluation.resolve(
|
|
||||||
knowledge_base.knowledge,
|
|
||||||
element,
|
|
||||||
raw))
|
|
||||||
|
|
||||||
# TODO: Probably should take into account the categories of the elements in the "intake" ([0]) element
|
|
||||||
atom_score = sum([resolved[i] == atom[i]
|
|
||||||
for i
|
|
||||||
in range(min(len(resolved),
|
|
||||||
len(atom)))])
|
|
||||||
token_score = sum([similar_token in tokens
|
|
||||||
for similar_token
|
|
||||||
in raw])
|
|
||||||
|
|
||||||
sorted_possibilities.append((raw, possibility, resolved, atom_score, token_score))
|
|
||||||
|
|
||||||
sorted_possibilities = sorted(sorted_possibilities, key=lambda p: p[3] * 100 + p[4], reverse=True)
|
|
||||||
if len(sorted_possibilities) < 1:
|
|
||||||
return []
|
|
||||||
|
|
||||||
for i, possibility in enumerate(sorted_possibilities):
|
|
||||||
similar_matcher, similar_result, similar_result_resolved, _atom_score, _token_score = possibility
|
|
||||||
with session().log("Like {}".format(similar_matcher)):
|
|
||||||
session().annotate('AST: {}'.format(similar_result))
|
|
||||||
session().annotate('Results on: {}'.format(similar_result_resolved))
|
|
||||||
session().annotate('Atom score: {}'.format(_atom_score))
|
|
||||||
session().annotate('Token score: {}'.format(_token_score))
|
|
||||||
|
|
||||||
return sorted_possibilities
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: unroll this mess
|
|
||||||
def get_matching(sample, other):
|
|
||||||
l = len(sample[0])
|
|
||||||
other = list(filter(lambda x: len(x[0]) == l, other))
|
|
||||||
for i in range(l):
|
|
||||||
if len(other) == 0:
|
|
||||||
return []
|
|
||||||
|
|
||||||
if isinstance(sample[0][i], dict): # Dictionaries are compared by groups
|
|
||||||
other = list(filter(lambda x: isinstance(x[0][i], dict) and
|
|
||||||
len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,
|
|
||||||
other))
|
|
||||||
|
|
||||||
elif isinstance(sample[0][i], tuple): # Tuples are compared by types [0]
|
|
||||||
other = list(filter(lambda x: isinstance(x[0][i], tuple) and
|
|
||||||
x[0][i][0] == sample[0][i][0],
|
|
||||||
other))
|
|
||||||
|
|
||||||
matching = []
|
|
||||||
for x in range(l): # Generate the combination of this and other(s) matcher
|
|
||||||
first_sample_data = sample[0][x]
|
|
||||||
if isinstance(first_sample_data, str):
|
|
||||||
matching.append(first_sample_data)
|
|
||||||
elif isinstance(first_sample_data, tuple):
|
|
||||||
matching.append(first_sample_data)
|
|
||||||
else:
|
|
||||||
this_groups = sample[0][x]['groups']
|
|
||||||
if len(other) > 0:
|
|
||||||
other_groups = reduce(lambda a, b: a & b,
|
|
||||||
map(lambda y: y[0][x]['groups'],
|
|
||||||
other))
|
|
||||||
this_groups = this_groups & other_groups
|
|
||||||
|
|
||||||
matching.append({'groups': this_groups})
|
|
||||||
return matching
|
|
||||||
|
|
||||||
|
|
||||||
def reverse_remix(tree_section, remix):
|
|
||||||
result_section = []
|
|
||||||
offset = 0
|
|
||||||
for origin in remix:
|
|
||||||
if isinstance(origin, int):
|
|
||||||
if (origin + offset) >= len(tree_section):
|
|
||||||
return None
|
|
||||||
|
|
||||||
result_section.append(copy.deepcopy(tree_section[origin + offset]))
|
|
||||||
else:
|
|
||||||
assert(isinstance(origin, str))
|
|
||||||
offset += 1
|
|
||||||
return result_section + tree_section[len(remix):]
|
|
||||||
|
|
||||||
|
|
||||||
def get_fit(knowledge, tokens, remaining_recursions=parameters.MAX_RECURSIONS):
|
|
||||||
results = []
|
|
||||||
for matcher, ast in knowledge.trained:
|
|
||||||
with session().log("{} <- {}".format(matcher, tokens)):
|
|
||||||
result = match_fit(knowledge, tokens, matcher, ast,
|
|
||||||
remaining_recursions)
|
|
||||||
|
|
||||||
if result is not None:
|
|
||||||
with session().log("Result: {}".format(result)):
|
|
||||||
results.append(result)
|
|
||||||
|
|
||||||
if len(results) > 0:
|
|
||||||
return results[0]
|
|
||||||
|
|
||||||
|
|
||||||
def is_definite_minisegment(minisegment):
|
|
||||||
return isinstance(minisegment, str) or isinstance(minisegment, dict)
|
|
||||||
|
|
||||||
|
|
||||||
def match_token(knowledge, next_token, minisegment):
|
|
||||||
if isinstance(minisegment, dict):
|
|
||||||
return knowledge_evaluation.can_be_used_in_place(knowledge, next_token, minisegment)
|
|
||||||
elif isinstance(minisegment, str):
|
|
||||||
# TODO: check if the two elements can be used in each other place
|
|
||||||
return next_token == minisegment
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def resolve_fit(knowledge, fit, remaining_recursions):
|
|
||||||
fitted = []
|
|
||||||
for element in fit:
|
|
||||||
if is_definite_minisegment(element):
|
|
||||||
fitted.append(element)
|
|
||||||
else:
|
|
||||||
with session().log("Resolving fit of `{}`".format(element)):
|
|
||||||
((result_type, remixer), tokens) = element
|
|
||||||
remixed_tokens = reverse_remix(tokens, remixer)
|
|
||||||
if remixed_tokens is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
minifit = get_fit(knowledge, remixed_tokens, remaining_recursions - 1)
|
|
||||||
if minifit is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
minitokens, miniast = minifit
|
|
||||||
session().annotate(" AST | {}".format(miniast))
|
|
||||||
subproperty = knowledge_evaluation.resolve(knowledge.knowledge, minitokens, miniast)
|
|
||||||
fitted.append(subproperty)
|
|
||||||
|
|
||||||
return fitted
|
|
||||||
|
|
||||||
|
|
||||||
def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
|
|
||||||
segment_possibilities = [([], tokens)] # Matched tokens, remaining tokens
|
|
||||||
indent = ' ' * (parameters.MAX_RECURSIONS - remaining_recursions)
|
|
||||||
session().annotate(indent + 'T> {}'.format(tokens))
|
|
||||||
session().annotate(indent + 'M> {}'.format(matcher))
|
|
||||||
for minisegment in matcher:
|
|
||||||
with session().log("Minisegment `{}`".format(minisegment)):
|
|
||||||
possibilities_after_round = []
|
|
||||||
for matched_tokens, remaining_tokens in segment_possibilities:
|
|
||||||
if len(remaining_tokens) < 1:
|
|
||||||
continue
|
|
||||||
|
|
||||||
session().annotate(indent + "RT {}".format(remaining_tokens[0]))
|
|
||||||
session().annotate(indent + "DEF {}".format(is_definite_minisegment(minisegment)))
|
|
||||||
if is_definite_minisegment(minisegment):
|
|
||||||
# What if not match -----<
|
|
||||||
if match_token(knowledge, remaining_tokens[0], minisegment):
|
|
||||||
possibilities_after_round.append((
|
|
||||||
matched_tokens + [remaining_tokens[0]],
|
|
||||||
remaining_tokens[1:]
|
|
||||||
))
|
|
||||||
else:
|
|
||||||
# What if not match!!!!!!-----<
|
|
||||||
# TODO: optimize this with a look ahead
|
|
||||||
for i in range(1, len(tokens)):
|
|
||||||
possibilities_after_round.append((
|
|
||||||
matched_tokens + [(minisegment, remaining_tokens[:i])],
|
|
||||||
remaining_tokens[i:]
|
|
||||||
))
|
|
||||||
session().annotate(indent + "## PA {}".format(possibilities_after_round))
|
|
||||||
else:
|
|
||||||
segment_possibilities = possibilities_after_round
|
|
||||||
for possibility in segment_possibilities:
|
|
||||||
with session().log("Possibility: `{}`".format(possibility)):
|
|
||||||
pass
|
|
||||||
if len(segment_possibilities) < 1:
|
|
||||||
with session().log("NO POSSIBLE"):
|
|
||||||
pass
|
|
||||||
|
|
||||||
fully_matched_segments = [(matched, remaining)
|
|
||||||
for (matched, remaining)
|
|
||||||
in segment_possibilities
|
|
||||||
if len(remaining) == 0]
|
|
||||||
|
|
||||||
resolved_fits = []
|
|
||||||
with session().log("Full matches"):
|
|
||||||
for fit, _ in fully_matched_segments:
|
|
||||||
with session().log(fit): # REMIXES HAVE TO BE APPLIED BEFORE!!!
|
|
||||||
pass
|
|
||||||
|
|
||||||
with session().log("Resolutions"):
|
|
||||||
for fit, _ in fully_matched_segments:
|
|
||||||
with session().log("Resolving {}".format(fit)): # REMIXES HAVE TO BE APPLIED BEFORE!!!
|
|
||||||
resolved_fit = resolve_fit(knowledge, fit, remaining_recursions)
|
|
||||||
if resolved_fit is not None:
|
|
||||||
resolved_fits.append(resolved_fit)
|
|
||||||
else:
|
|
||||||
session().annotate("Not resolved")
|
|
||||||
|
|
||||||
if len(resolved_fits) == 0:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return resolved_fits[0], ast
|
|
@ -1,16 +0,0 @@
|
|||||||
from . import parsing
|
|
||||||
|
|
||||||
class ParsingLayer:
|
|
||||||
def __init__(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def integrate(self, knowledge_base, example):
|
|
||||||
yield from parsing.integrate_language(knowledge_base, example)
|
|
||||||
|
|
||||||
def train(self, knowledge_base, example):
|
|
||||||
assert False
|
|
||||||
|
|
||||||
def process(self, knowledge_base, input):
|
|
||||||
fit = parsing.get_fit(knowledge_base, input)
|
|
||||||
if fit is not None:
|
|
||||||
yield fit
|
|
@ -1,186 +0,0 @@
|
|||||||
from ..session.org_mode import global_session as session
|
|
||||||
from ..atoms import Atom, a, is_atom
|
|
||||||
|
|
||||||
def lookahead_for_tokens_or_strucutral_elements(knowledge_base, remaining):
|
|
||||||
for se in knowledge_base.structural_elements:
|
|
||||||
found_position = remaining.find(se)
|
|
||||||
found = found_position >= 0
|
|
||||||
session().annotate('Looking for structure with “{}”, found? {}'.format(se, found))
|
|
||||||
if found:
|
|
||||||
return [
|
|
||||||
(remaining[:found_position], se, remaining[found_position + len(se):])
|
|
||||||
]
|
|
||||||
|
|
||||||
for token in knowledge_base.knowledge.keys():
|
|
||||||
found_position = remaining.find(token)
|
|
||||||
found = found_position >= 0
|
|
||||||
session().annotate('Looking for token “{}”, found? {}'.format(token, found))
|
|
||||||
if found:
|
|
||||||
return [
|
|
||||||
(remaining[:found_position], token, remaining[found_position + len(token):])
|
|
||||||
]
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def to_tokens(knowledge_base, text, precedent=None):
|
|
||||||
if len(text) == 0:
|
|
||||||
session().annotate("No text remaining")
|
|
||||||
yield ['']
|
|
||||||
return
|
|
||||||
|
|
||||||
with session().log("Tokenizing {}".format(text)):
|
|
||||||
for option in knowledge_base.expected_token_after_precedent(precedent):
|
|
||||||
with session().log("Next: “{}”".format(option)):
|
|
||||||
with session().log("Matching “{}” on “{}”".format(option, text)):
|
|
||||||
for token_match in tokenization_match(option, text, knowledge_base):
|
|
||||||
if token_match is None:
|
|
||||||
session().annotate("No match")
|
|
||||||
|
|
||||||
match, remaining = token_match
|
|
||||||
if len(remaining) == len(text):
|
|
||||||
raise Exception('No text consumed in match')
|
|
||||||
|
|
||||||
session().annotate('Match: “{}”'.format(match))
|
|
||||||
with session().log('Remaining “{}”'.format(remaining)):
|
|
||||||
for sublevel in to_tokens(knowledge_base, remaining, match):
|
|
||||||
candidate = list(filter(lambda x: x != '', [match] + sublevel))
|
|
||||||
session().annotate('Yielding candidate “{}”'.format(candidate))
|
|
||||||
yield candidate
|
|
||||||
|
|
||||||
|
|
||||||
def tokenization_match(element, text, knowledge_base):
|
|
||||||
# Constant/structural string matching
|
|
||||||
if isinstance(element, str):
|
|
||||||
if text.find(element) == 0:
|
|
||||||
# This match comes from a structuring element
|
|
||||||
# It doesn't appear on the tokenization
|
|
||||||
# So we should return it as an empty string
|
|
||||||
yield ('', text[len(element):])
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
# No match found
|
|
||||||
return
|
|
||||||
|
|
||||||
elif is_atom(element, 'token'):
|
|
||||||
yield from match_single_token(text, knowledge_base)
|
|
||||||
return
|
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
|
|
||||||
def match_single_token(text, knowledge_base):
|
|
||||||
found_token = False
|
|
||||||
for token in knowledge_base.knowledge.keys():
|
|
||||||
if text.find(token) == 0:
|
|
||||||
yield token, text[len(token):]
|
|
||||||
found_token = True
|
|
||||||
|
|
||||||
if found_token:
|
|
||||||
return
|
|
||||||
|
|
||||||
session().annotate('No token found at the start of ”{}”'.format(text))
|
|
||||||
session().annotate('using structural elements to infer it')
|
|
||||||
# TODO: review this when multiple structural elements are available
|
|
||||||
for se in knowledge_base.structural_elements:
|
|
||||||
session().annotate('Looking for se “{}” in “{}”'.format(se, text))
|
|
||||||
position = text.find(se, 0)
|
|
||||||
found = position > 0 # 0 is not considered a valid position for this kind of split
|
|
||||||
if found:
|
|
||||||
session().annotate('Found ”{}”, inferring “{}”'.format(se, text[:position]))
|
|
||||||
yield text[:position], text[position:]
|
|
||||||
|
|
||||||
session().annotate('No structural element or token found, inferring only token remaining')
|
|
||||||
yield text, ''
|
|
||||||
|
|
||||||
# Using other tokens for cutoff
|
|
||||||
for token in knowledge_base.knowledge.keys():
|
|
||||||
session().annotate('Looking for token “{}” in “{}”'.format(token, text))
|
|
||||||
position = text.find(token)
|
|
||||||
found = position >= 0
|
|
||||||
if found:
|
|
||||||
session().annotate('Found ”{}”, in position ”{}”'.format(token, position))
|
|
||||||
yield text[:position], text[position:]
|
|
||||||
|
|
||||||
|
|
||||||
def integrate_tokenization(knowledge_base, example):
|
|
||||||
text = example['text']
|
|
||||||
tokens = example['tokens']
|
|
||||||
meaning = example.get('meaning')
|
|
||||||
|
|
||||||
return integrate_token_to_text_matching(knowledge_base, text, tokens)
|
|
||||||
|
|
||||||
|
|
||||||
def integrate_token_to_text_matching(knowledge_base, text, tokens):
|
|
||||||
texts = [text]
|
|
||||||
|
|
||||||
# Convert to tokens
|
|
||||||
for token_id, token in enumerate(tokens):
|
|
||||||
# Look for token in texts
|
|
||||||
for i, text in enumerate(texts):
|
|
||||||
if isinstance(text, int):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if token in text:
|
|
||||||
before, after = text.split(token, maxsplit=1)
|
|
||||||
texts = (texts[:i] + [before]
|
|
||||||
+ [a('token')]
|
|
||||||
+ [after] + texts[i + 1:])
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise Exception('Token not found')
|
|
||||||
|
|
||||||
# Remove leftovers from splits
|
|
||||||
texts = list(filter(lambda x: x != '', texts))
|
|
||||||
session().log("Tokenized as {} over {}".format(texts, tokens))
|
|
||||||
|
|
||||||
for i, element in enumerate(texts[:-1]):
|
|
||||||
learn_token_pair(element, texts[i + 1], knowledge_base)
|
|
||||||
|
|
||||||
return tokens
|
|
||||||
|
|
||||||
def learn_token_pair(precedent, consequent, knowledge_base):
|
|
||||||
knowledge_base.add_token_pair(precedent, consequent)
|
|
||||||
|
|
||||||
|
|
||||||
def pick_one_tokenization(options, knowledge_base):
|
|
||||||
'''
|
|
||||||
Heuristic function to pick the most probable tokenization.
|
|
||||||
|
|
||||||
Just pick the one with more results.
|
|
||||||
'''
|
|
||||||
options = list(options)
|
|
||||||
with session().log("Picking among: {} options".format(len(options))):
|
|
||||||
session().log("Options: \n{}".format('\n'.join(map(str, options))))
|
|
||||||
return pick_by_score(options,
|
|
||||||
[
|
|
||||||
# By number of splits without structuring elements
|
|
||||||
lambda tokenization: sum(map(
|
|
||||||
lambda split: sum(map(
|
|
||||||
lambda se: se in split, knowledge_base.structural_elements
|
|
||||||
)), tokenization)),
|
|
||||||
|
|
||||||
# By number of unknown tokens
|
|
||||||
lambda tokenization: len(list(filter(lambda token:
|
|
||||||
(token not in knowledge_base.knowledge.keys()) and
|
|
||||||
(token not in knowledge_base.structural_elements),
|
|
||||||
tokenization))),
|
|
||||||
|
|
||||||
# By number of splits
|
|
||||||
lambda tokenization: -len(tokenization),
|
|
||||||
])
|
|
||||||
|
|
||||||
def pick_by_score(options, heuristics):
|
|
||||||
for heuristic in heuristics:
|
|
||||||
assert(len(options) > 0)
|
|
||||||
options = list(map(lambda opt: (heuristic(opt), opt), options))
|
|
||||||
sorted_options = sorted(options, key=lambda x: x[0], reverse=False)
|
|
||||||
|
|
||||||
heuristic_cutoff = sorted_options[0][0]
|
|
||||||
session().annotate(sorted_options)
|
|
||||||
pass_heuristic = [opt for (score, opt) in sorted_options if score <= heuristic_cutoff]
|
|
||||||
options = pass_heuristic
|
|
||||||
|
|
||||||
session().log("{} finalists: \n{}".format(len(options), '\n'.join(map(str, options))))
|
|
||||||
return options[0]
|
|
||||||
|
|
@ -1,90 +0,0 @@
|
|||||||
from ..session.org_mode import global_session as session
|
|
||||||
from ..atoms import Atom
|
|
||||||
from . import tokenization
|
|
||||||
import random
|
|
||||||
import copy
|
|
||||||
|
|
||||||
def randomized_weighted_list(elements):
|
|
||||||
# Randomized
|
|
||||||
randomized = list(elements)
|
|
||||||
random.shuffle(randomized)
|
|
||||||
|
|
||||||
# And return only once
|
|
||||||
already_returned = set()
|
|
||||||
for e in randomized:
|
|
||||||
if e in already_returned:
|
|
||||||
continue
|
|
||||||
|
|
||||||
yield e
|
|
||||||
already_returned.add(e)
|
|
||||||
|
|
||||||
class TokenizationLayer:
|
|
||||||
def __init__(self, knowledge_base):
|
|
||||||
self.structural_elements = set()
|
|
||||||
self.token_chains = {}
|
|
||||||
self.tokens = set()
|
|
||||||
self.knowledge_base = knowledge_base
|
|
||||||
self.knowledge = knowledge_base.knowledge
|
|
||||||
|
|
||||||
def integrate(self, knowledge_base, data):
|
|
||||||
assert knowledge_base is self.knowledge_base
|
|
||||||
|
|
||||||
assert 'text' in data
|
|
||||||
tokens = self.tokenize(data['text'])
|
|
||||||
data_with_row = copy.copy(data)
|
|
||||||
data_with_row['tokens'] = tokens
|
|
||||||
yield data_with_row
|
|
||||||
|
|
||||||
# with session().log("Tokenize: {}".format(data['text'])):
|
|
||||||
# for tokens in tokenization.to_tokens(self, data['text']):
|
|
||||||
# data_with_row = copy.copy(data)
|
|
||||||
# data_with_row['tokens'] = tokens
|
|
||||||
# yield data_with_row
|
|
||||||
|
|
||||||
def process(self, knowledge_base, row):
|
|
||||||
yield self.tokenize(row)
|
|
||||||
|
|
||||||
|
|
||||||
def tokenize(self, row, return_one=True):
|
|
||||||
row = row.lower()
|
|
||||||
with session().log("Tokenize: {}".format(row)):
|
|
||||||
options = list(tokenization.to_tokens(self, row))
|
|
||||||
session().log("Results:\n{}".format('\n'.join(map(str, options))))
|
|
||||||
|
|
||||||
if return_one:
|
|
||||||
chosen = tokenization.pick_one_tokenization(options, self)
|
|
||||||
session().log("Chosen: “{}”".format(chosen))
|
|
||||||
self.train({'text': row, 'tokens': chosen})
|
|
||||||
return chosen
|
|
||||||
return options
|
|
||||||
|
|
||||||
## Tokenization
|
|
||||||
def add_token_pair(self, precedent, consequent):
|
|
||||||
self.add_token(precedent)
|
|
||||||
self.add_token(consequent)
|
|
||||||
|
|
||||||
if precedent not in self.token_chains:
|
|
||||||
self.token_chains[precedent] = []
|
|
||||||
self.token_chains[precedent].append(consequent)
|
|
||||||
|
|
||||||
def add_token(self, token):
|
|
||||||
self.tokens.add(token)
|
|
||||||
if (not isinstance(token, Atom)) and (token not in self.structural_elements):
|
|
||||||
session().annotate('Found new structural element “{}”'.format(token))
|
|
||||||
self.structural_elements.add(token)
|
|
||||||
|
|
||||||
def expected_token_after_precedent(self, precedent=None):
|
|
||||||
if precedent not in self.token_chains: # If there's no known precedent, just return all tokens
|
|
||||||
return randomized_weighted_list(self.tokens)
|
|
||||||
|
|
||||||
return randomized_weighted_list(self.token_chains[precedent])
|
|
||||||
|
|
||||||
def train(self, example):
|
|
||||||
with session().log('Training tokenizer'):
|
|
||||||
session().annotate("Example: {}".format(example))
|
|
||||||
tokens = tokenization.integrate_tokenization(self, example)
|
|
||||||
|
|
||||||
# Integrate knowledge of concept
|
|
||||||
for token in tokens:
|
|
||||||
if not token in self.knowledge:
|
|
||||||
self.knowledge[token] = {}
|
|
384
naive-nlu/tree_nlu/parsing.py
Normal file
384
naive-nlu/tree_nlu/parsing.py
Normal file
@ -0,0 +1,384 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from . import knowledge_evaluation
|
||||||
|
|
||||||
|
from . import depth_meter
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import copy
|
||||||
|
|
||||||
|
from functools import reduce
|
||||||
|
from typing import List
|
||||||
|
from .modifiable_property import ModifiableProperty
|
||||||
|
from . import parameters
|
||||||
|
|
||||||
|
# TODO: more flexible tokenization
|
||||||
|
def to_tokens(text):
|
||||||
|
return re.findall(r'(\w+|[^\s])', text)
|
||||||
|
|
||||||
|
|
||||||
|
def make_template(knowledge_base, tokens, parsed):
|
||||||
|
matcher = list(tokens)
|
||||||
|
template = list(parsed)
|
||||||
|
for i in range(len(matcher)):
|
||||||
|
word = matcher[i]
|
||||||
|
if word in template:
|
||||||
|
template[template.index(word)] = i
|
||||||
|
matcher[i] = {
|
||||||
|
'groups': set(knowledge_base.knowledge[word]['groups'])
|
||||||
|
}
|
||||||
|
return tokens, matcher, template
|
||||||
|
|
||||||
|
|
||||||
|
def is_bottom_level(tree):
|
||||||
|
for element in tree:
|
||||||
|
if isinstance(element, list) or isinstance(element, tuple):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def get_lower_levels(parsed):
|
||||||
|
lower = []
|
||||||
|
def aux(subtree, path):
|
||||||
|
nonlocal lower
|
||||||
|
deeper = len(path) == 0
|
||||||
|
for i, element in enumerate(subtree):
|
||||||
|
if isinstance(element, list) or isinstance(element, tuple):
|
||||||
|
aux(element, path + (i,))
|
||||||
|
deeper = True
|
||||||
|
|
||||||
|
if not deeper:
|
||||||
|
lower.append((path, subtree))
|
||||||
|
|
||||||
|
aux(parsed, path=())
|
||||||
|
return lower
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: probably optimize this, it creates lots of unnecessary tuples
|
||||||
|
def replace_position(tree, position, new_element):
|
||||||
|
|
||||||
|
def aux(current_tree, remaining_route):
|
||||||
|
if len(remaining_route) == 0:
|
||||||
|
return new_element
|
||||||
|
|
||||||
|
else:
|
||||||
|
step = remaining_route[0]
|
||||||
|
return (
|
||||||
|
tree[:step]
|
||||||
|
+ (aux(tree[step], remaining_route[1:]),)
|
||||||
|
+ tree[step + 2:]
|
||||||
|
)
|
||||||
|
|
||||||
|
return aux(tree, position)
|
||||||
|
|
||||||
|
|
||||||
|
def integrate_language(knowledge_base, example):
|
||||||
|
text = example["text"].lower()
|
||||||
|
parsed = example["parsed"]
|
||||||
|
|
||||||
|
resolved_parsed = copy.deepcopy(parsed)
|
||||||
|
tokens = to_tokens(text)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
logging.debug("P: {}".format(resolved_parsed))
|
||||||
|
lower_levels = get_lower_levels(resolved_parsed)
|
||||||
|
logging.debug("Lower: {}".format(lower_levels))
|
||||||
|
if len(lower_levels) == 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
for position, atom in lower_levels:
|
||||||
|
logging.debug("\x1b[1mSelecting\x1b[0m: {}".format(atom))
|
||||||
|
similar = get_similar_tree(knowledge_base, atom)
|
||||||
|
remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar)
|
||||||
|
_, matcher, result = make_template(knowledge_base, tokens, atom)
|
||||||
|
logging.debug("Tx: {}".format(tokens))
|
||||||
|
logging.debug("Mx: {}".format(matcher))
|
||||||
|
logging.debug("Rx: {}".format(result))
|
||||||
|
logging.debug("Remix: {}".format(remix))
|
||||||
|
|
||||||
|
after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
|
||||||
|
assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens))
|
||||||
|
logging.debug( " +-> {}".format(after_remix))
|
||||||
|
subquery_type = knowledge_evaluation.get_subquery_type(knowledge_base.knowledge, atom)
|
||||||
|
logging.debug(r" \-> <{}>".format(subquery_type))
|
||||||
|
|
||||||
|
# Clean remaining tokens
|
||||||
|
new_tokens = list(tokens)
|
||||||
|
offset = len(start_bounds)
|
||||||
|
for _ in range(len(remix)):
|
||||||
|
new_tokens.pop(offset)
|
||||||
|
|
||||||
|
# TODO: Get a specific types for... types
|
||||||
|
new_tokens.insert(offset, (subquery_type, remix))
|
||||||
|
tokens = new_tokens
|
||||||
|
|
||||||
|
resolved_parsed = replace_position(resolved_parsed, position, offset)
|
||||||
|
logging.debug("#########")
|
||||||
|
|
||||||
|
|
||||||
|
tokens, matcher, result = make_template(knowledge_base, tokens, resolved_parsed)
|
||||||
|
logging.debug("T: {}".format(tokens))
|
||||||
|
logging.debug("M: {}".format(matcher))
|
||||||
|
logging.debug("R: {}".format(result))
|
||||||
|
logging.debug("---")
|
||||||
|
return tokens, matcher, result
|
||||||
|
|
||||||
|
|
||||||
|
def apply_remix(tokens, remix):
|
||||||
|
rebuilt = []
|
||||||
|
for i in remix:
|
||||||
|
rebuilt.append(tokens[i])
|
||||||
|
return rebuilt
|
||||||
|
|
||||||
|
|
||||||
|
def build_remix_matrix(knowledge_base, tokens, atom, similar):
|
||||||
|
tokens = list(tokens)
|
||||||
|
tokens, matcher, result = make_template(knowledge_base, tokens, atom)
|
||||||
|
similar_matcher, similar_result, similar_result_resolved, _ = similar
|
||||||
|
|
||||||
|
start_bounds, end_bounds = find_bounds(matcher, similar_matcher)
|
||||||
|
|
||||||
|
for i, element in (end_bounds + start_bounds[::-1]):
|
||||||
|
matcher.pop(i)
|
||||||
|
tokens.pop(i)
|
||||||
|
|
||||||
|
possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher)
|
||||||
|
chosen_remix = possible_remixes[0]
|
||||||
|
|
||||||
|
return chosen_remix, (start_bounds, end_bounds)
|
||||||
|
|
||||||
|
|
||||||
|
def get_possible_remixes(knowledge_base, matcher, similar_matcher):
|
||||||
|
|
||||||
|
matrix = []
|
||||||
|
for element in matcher:
|
||||||
|
logging.debug("- {}".format(element))
|
||||||
|
logging.debug("+ {}".format(similar_matcher))
|
||||||
|
assert(element in similar_matcher or isinstance(element, dict))
|
||||||
|
|
||||||
|
if isinstance(element, dict):
|
||||||
|
indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
|
||||||
|
else:
|
||||||
|
indexes = all_indexes(similar_matcher, element)
|
||||||
|
matrix.append(indexes)
|
||||||
|
|
||||||
|
# TODO: do some scoring to find the most "interesting combination"
|
||||||
|
return [list(x) for x in list(zip(*matrix))]
|
||||||
|
|
||||||
|
|
||||||
|
def all_indexes(collection, element):
|
||||||
|
indexes = []
|
||||||
|
base = 0
|
||||||
|
|
||||||
|
for _ in range(collection.count(element)):
|
||||||
|
i = collection.index(element, base)
|
||||||
|
base = i + 1
|
||||||
|
indexes.append(i)
|
||||||
|
|
||||||
|
return indexes
|
||||||
|
|
||||||
|
|
||||||
|
def all_matching_indexes(knowledge_base, collection, element):
|
||||||
|
indexes = []
|
||||||
|
|
||||||
|
assert("groups" in element)
|
||||||
|
element = element["groups"]
|
||||||
|
for i, instance in enumerate(collection):
|
||||||
|
if isinstance(instance, dict):
|
||||||
|
instance = instance["groups"]
|
||||||
|
elif instance in knowledge_base.knowledge:
|
||||||
|
instance = knowledge_base.knowledge[instance]["groups"]
|
||||||
|
|
||||||
|
intersection = set(instance) & set(element)
|
||||||
|
if len(intersection) > 0:
|
||||||
|
indexes.append((i, intersection))
|
||||||
|
|
||||||
|
return [x[0] for x in sorted(indexes, key=lambda x: len(x[1]), reverse=True)]
|
||||||
|
|
||||||
|
|
||||||
|
def find_bounds(matcher, similar_matcher):
|
||||||
|
start_bounds = []
|
||||||
|
for i, element in enumerate(matcher):
|
||||||
|
if element in similar_matcher:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
start_bounds.append((i, element))
|
||||||
|
|
||||||
|
end_bounds = []
|
||||||
|
for i, element in enumerate(matcher[::-1]):
|
||||||
|
if element in similar_matcher:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
end_bounds.append((len(matcher) - (i + 1), element))
|
||||||
|
|
||||||
|
return start_bounds, end_bounds
|
||||||
|
|
||||||
|
|
||||||
|
def get_similar_tree(knowledge_base, atom):
|
||||||
|
possibilities = []
|
||||||
|
|
||||||
|
# Find matching possibilities
|
||||||
|
for entry, tree in knowledge_base.trained:
|
||||||
|
if not is_bottom_level(tree):
|
||||||
|
continue
|
||||||
|
if tree[0] == atom[0]:
|
||||||
|
possibilities.append((entry, tree))
|
||||||
|
|
||||||
|
# Sort by more matching elements
|
||||||
|
sorted_possibilities = []
|
||||||
|
for (raw, possibility) in possibilities:
|
||||||
|
resolved = []
|
||||||
|
for element in atom:
|
||||||
|
if isinstance(element, str):
|
||||||
|
resolved.append(element)
|
||||||
|
else:
|
||||||
|
resolved.append(knowledge_evaluation.resolve(
|
||||||
|
knowledge_base.knowledge,
|
||||||
|
element,
|
||||||
|
raw))
|
||||||
|
|
||||||
|
# TODO: Probably should take into account the categories of the elements in the "intake" ([0]) element
|
||||||
|
score = sum([resolved[i] == atom[i]
|
||||||
|
for i
|
||||||
|
in range(min(len(resolved),
|
||||||
|
len(atom)))])
|
||||||
|
sorted_possibilities.append((raw, possibility, resolved, score))
|
||||||
|
sorted_possibilities = sorted(sorted_possibilities, key=lambda p: p[3], reverse=True)
|
||||||
|
if len(sorted_possibilities) < 1:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return sorted_possibilities[0]
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: unroll this mess
|
||||||
|
def get_matching(sample, other):
|
||||||
|
l = len(sample[0])
|
||||||
|
other = list(filter(lambda x: len(x[0]) == l, other))
|
||||||
|
for i in range(l):
|
||||||
|
if len(other) == 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
if isinstance(sample[0][i], dict): # Dictionaries are compared by groups
|
||||||
|
other = list(filter(lambda x: isinstance(x[0][i], dict) and
|
||||||
|
len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,
|
||||||
|
other))
|
||||||
|
|
||||||
|
elif isinstance(sample[0][i], tuple): # Tuples are compared by types [0]
|
||||||
|
other = list(filter(lambda x: isinstance(x[0][i], tuple) and
|
||||||
|
x[0][i][0] == sample[0][i][0],
|
||||||
|
other))
|
||||||
|
|
||||||
|
return [sample[0][x] if isinstance(sample[0][x], str)
|
||||||
|
else
|
||||||
|
sample[0][x] if isinstance(sample[0][x], tuple)
|
||||||
|
else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b,
|
||||||
|
map(lambda y: y[0][x]['groups'],
|
||||||
|
other))}
|
||||||
|
for x
|
||||||
|
in range(l)]
|
||||||
|
|
||||||
|
|
||||||
|
def reprocess_language_knowledge(knowledge_base, examples):
|
||||||
|
examples = knowledge_base.examples + examples
|
||||||
|
|
||||||
|
pattern_examples = []
|
||||||
|
for i, sample in enumerate(examples):
|
||||||
|
other = examples[:i] + examples[i + 1:]
|
||||||
|
match = get_matching(sample, other)
|
||||||
|
if len(match) > 0:
|
||||||
|
sample = (match, sample[1],)
|
||||||
|
pattern_examples.append(sample)
|
||||||
|
|
||||||
|
return pattern_examples
|
||||||
|
|
||||||
|
|
||||||
|
def reverse_remix(tree_section, remix):
|
||||||
|
result_section = []
|
||||||
|
for origin in remix:
|
||||||
|
result_section.append(copy.deepcopy(tree_section[origin]))
|
||||||
|
return result_section + tree_section[len(remix):]
|
||||||
|
|
||||||
|
|
||||||
|
def get_fit(knowledge, tokens, remaining_recursions=parameters.MAX_RECURSIONS):
|
||||||
|
for matcher, ast in knowledge.trained:
|
||||||
|
result = match_fit(knowledge, tokens, matcher, ast,
|
||||||
|
remaining_recursions)
|
||||||
|
if result is not None:
|
||||||
|
return result
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def is_definite_minisegment(minisegment):
|
||||||
|
return isinstance(minisegment, str) or isinstance(minisegment, dict)
|
||||||
|
|
||||||
|
|
||||||
|
def match_token(knowledge, next_token, minisegment):
|
||||||
|
if isinstance(minisegment, dict):
|
||||||
|
# TODO: check if the dictionary matches the values
|
||||||
|
return True
|
||||||
|
elif isinstance(minisegment, str):
|
||||||
|
# TODO: check if the two elements can be used in each other place
|
||||||
|
return next_token == minisegment
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_fit(knowledge, fit, remaining_recursions):
|
||||||
|
fitted = []
|
||||||
|
for element in fit:
|
||||||
|
if is_definite_minisegment(element):
|
||||||
|
fitted.append(element)
|
||||||
|
else:
|
||||||
|
((result_type, remixer), tokens) = element
|
||||||
|
remixed_tokens = reverse_remix(tokens, remixer)
|
||||||
|
minifit = get_fit(knowledge, remixed_tokens, remaining_recursions - 1)
|
||||||
|
if minifit is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
minitokens, miniast = minifit
|
||||||
|
subproperty = knowledge_evaluation.resolve(knowledge.knowledge, minitokens, miniast)
|
||||||
|
fitted.append(subproperty)
|
||||||
|
|
||||||
|
return fitted
|
||||||
|
|
||||||
|
|
||||||
|
def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
|
||||||
|
segment_possibilities = [([], tokens)] # Matched tokens, remaining tokens
|
||||||
|
for minisegment in matcher:
|
||||||
|
possibilities_after_round = []
|
||||||
|
for matched_tokens, remaining_tokens in segment_possibilities:
|
||||||
|
if len(remaining_tokens) < 1:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if is_definite_minisegment(minisegment):
|
||||||
|
if match_token(knowledge, remaining_tokens[0], minisegment):
|
||||||
|
possibilities_after_round.append((
|
||||||
|
matched_tokens + [remaining_tokens[0]],
|
||||||
|
remaining_tokens[1:]
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
# TODO: optimize this with a look ahead
|
||||||
|
for i in range(1, len(tokens)):
|
||||||
|
possibilities_after_round.append((
|
||||||
|
matched_tokens + [(minisegment, remaining_tokens[:i])],
|
||||||
|
remaining_tokens[i:]
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
segment_possibilities = possibilities_after_round
|
||||||
|
|
||||||
|
fully_matched_segments = [(matched, remaining)
|
||||||
|
for (matched, remaining)
|
||||||
|
in segment_possibilities
|
||||||
|
if len(remaining) == 0]
|
||||||
|
|
||||||
|
resolved_fits = []
|
||||||
|
for fit, _ in fully_matched_segments:
|
||||||
|
resolved_fit = resolve_fit(knowledge, fit, remaining_recursions)
|
||||||
|
if resolved_fit is not None:
|
||||||
|
resolved_fits.append(resolved_fit)
|
||||||
|
|
||||||
|
if len(resolved_fits) == 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return resolved_fits[0], ast
|
@ -1,79 +0,0 @@
|
|||||||
import logging
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
SESSION = None
|
|
||||||
|
|
||||||
def __gen_session_name__():
|
|
||||||
now = datetime.datetime.utcnow()
|
|
||||||
return "treeNLU-session-{}.org".format(
|
|
||||||
now.strftime("%y_%m_%d %H:%M:%S_%f"))
|
|
||||||
|
|
||||||
|
|
||||||
def create_global_session(fname):
|
|
||||||
global SESSION
|
|
||||||
SESSION = OrgModeSession(fname)
|
|
||||||
|
|
||||||
|
|
||||||
def global_session():
|
|
||||||
if SESSION is None:
|
|
||||||
session_name = __gen_session_name__()
|
|
||||||
logging.warn("Session not created, saved on {}".format(session_name))
|
|
||||||
create_global_session(session_name)
|
|
||||||
|
|
||||||
assert(SESSION is not None)
|
|
||||||
return SESSION
|
|
||||||
|
|
||||||
|
|
||||||
def get_header():
|
|
||||||
now = datetime.datetime.utcnow()
|
|
||||||
return ("# Ran on {}\n".format(
|
|
||||||
now.strftime("%y/%m/%d %H:%M:%S.%f")))
|
|
||||||
|
|
||||||
class LevelContext:
|
|
||||||
def __init__(self, increaser, decreaser):
|
|
||||||
self.increaser = increaser
|
|
||||||
self.decreaser = decreaser
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
self.increaser()
|
|
||||||
|
|
||||||
def __exit__(self, _type, _value, _traceback):
|
|
||||||
self.decreaser()
|
|
||||||
|
|
||||||
|
|
||||||
class OrgModeSession:
|
|
||||||
def __init__(self, fname):
|
|
||||||
self.f = open(fname, 'wt')
|
|
||||||
self.level = 0
|
|
||||||
self.dirty = False
|
|
||||||
|
|
||||||
self.f.write(get_header())
|
|
||||||
|
|
||||||
def annotate(self, annotation):
|
|
||||||
if self.dirty:
|
|
||||||
self.f.write("{indentation} {data}\n".format(
|
|
||||||
indentation='*' * (self.level + 1),
|
|
||||||
data="---"))
|
|
||||||
self.dirty = False
|
|
||||||
|
|
||||||
self.f.write("{indentation} {data}\n".format(
|
|
||||||
indentation=' ' * (self.level + 2 + 1),
|
|
||||||
data=annotation))
|
|
||||||
|
|
||||||
def log(self, string):
|
|
||||||
self.f.write("{indentation} {data}\n".format(
|
|
||||||
indentation='*' * (self.level + 1),
|
|
||||||
data=string))
|
|
||||||
self.dirty = False
|
|
||||||
|
|
||||||
return LevelContext(self.inc_level, self.dec_level)
|
|
||||||
|
|
||||||
def inc_level(self):
|
|
||||||
self.level += 1
|
|
||||||
|
|
||||||
def dec_level(self):
|
|
||||||
self.level -= 1
|
|
||||||
self.dirty = True
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
self.f.close()
|
|
@ -1,50 +1,61 @@
|
|||||||
import traceback
|
import json
|
||||||
import logging
|
import logging
|
||||||
from .session import org_mode
|
|
||||||
|
|
||||||
from .tests import tokenization
|
logging.getLogger().setLevel(logging.INFO)
|
||||||
from .tests import basic
|
|
||||||
from .tests import gac_100
|
|
||||||
from .tests import gac_extension
|
|
||||||
|
|
||||||
logging.getLogger().setLevel(logging.ERROR)
|
from .knowledge_base import KnowledgeBase
|
||||||
|
from .modifiable_property import is_modifiable_property
|
||||||
|
|
||||||
tests = (
|
import hy
|
||||||
("tokenization", tokenization),
|
from .tests import base
|
||||||
("basic", basic),
|
|
||||||
("gac 100", gac_100),
|
|
||||||
("gac+", gac_extension),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
def test_assumption(expectedResponse, knowledge, query):
|
||||||
|
logging.info("Query: {}".format(query['text']))
|
||||||
|
logging.info("Expected: {}".format(expectedResponse))
|
||||||
|
|
||||||
def gen_session_name():
|
result, abstract_tree, diff = knowledge.process(query['text'])
|
||||||
return "treeNLU-test-session.org"
|
end_result = result.getter() if is_modifiable_property(result) else result
|
||||||
|
|
||||||
|
logging.info("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result))
|
||||||
|
assert(end_result == expectedResponse)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
org_mode.create_global_session(gen_session_name())
|
base.run_tests()
|
||||||
failed = False
|
knowledge = KnowledgeBase(
|
||||||
for test_name, test_module in tests:
|
knowledge=base_knowledge,
|
||||||
try:
|
)
|
||||||
with org_mode.global_session().log(test_name):
|
|
||||||
test_module.main()
|
|
||||||
print(" \x1b[1;32m✓\x1b[0m {}".format(test_name))
|
|
||||||
except AssertionError as ae:
|
|
||||||
print(" \x1b[1;31m✗\x1b[0m {}{}".format(test_name,
|
|
||||||
('\n [Assertion] {}'.format(ae.args[0])) if len(ae.args) > 0
|
|
||||||
else ''))
|
|
||||||
traceback.print_exc()
|
|
||||||
failed = True
|
|
||||||
|
|
||||||
except Exception as e:
|
differences = knowledge.train(examples)
|
||||||
print(" \x1b[1;7;31m!\x1b[0m {}\n [Exception] {}".format(test_name, e))
|
|
||||||
failed = True
|
|
||||||
traceback.print_exc()
|
|
||||||
raise
|
|
||||||
org_mode.global_session().close()
|
|
||||||
|
|
||||||
if failed:
|
logging.info("----")
|
||||||
exit(1)
|
logging.info(differences())
|
||||||
|
logging.info("----")
|
||||||
|
|
||||||
|
test_assumption(True, knowledge, {'text': 'earth is a planet'})
|
||||||
|
test_assumption(True, knowledge, {'text': 'is lava dangerous?'})
|
||||||
|
for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
|
||||||
|
row = test['text']
|
||||||
|
result, inferred_tree, differences = knowledge.process(row)
|
||||||
|
|
||||||
|
logging.info("result:", result)
|
||||||
|
logging.info(differences())
|
||||||
|
logging.info("---")
|
||||||
|
logging.info('-----')
|
||||||
|
logging.info(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
|
||||||
|
logging.info('-----')
|
||||||
|
|
||||||
|
queryTrue = {
|
||||||
|
"text": "is io a moon?",
|
||||||
|
"parsed": ("question", ("pertenence-to-group", "io", "moon"))
|
||||||
|
}
|
||||||
|
queryFalse = {
|
||||||
|
"text": "is io a planet?",
|
||||||
|
"parsed": ("question", ("pertenence-to-group", "io", "planet"))
|
||||||
|
}
|
||||||
|
|
||||||
|
test_assumption(False, knowledge, queryFalse)
|
||||||
|
test_assumption(True, knowledge, queryTrue)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
62
naive-nlu/tree_nlu/tests/base.hy
Normal file
62
naive-nlu/tree_nlu/tests/base.hy
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
(import [..knowledge_base [KnowledgeBase]])
|
||||||
|
|
||||||
|
(setv knowledge-base
|
||||||
|
{
|
||||||
|
"icecream" { "groups" (set ["noun" "object" "comestible" "sweet"]) }
|
||||||
|
|
||||||
|
"lava" { "groups" (set ["noun" "object"]) }
|
||||||
|
"earth" { "groups" (set ["noun" "object" "planet"]) }
|
||||||
|
"io" { "groups" (set ["noun" "object"]) }
|
||||||
|
"green" { "groups" (set ["noun" "color" "concept"]) }
|
||||||
|
"plane" { "groups" (set ["noun" "object" "vehicle" "fast"]) }
|
||||||
|
"car" { "groups" (set ["noun" "object" "vehicle" "slow-ish"]) }
|
||||||
|
"wale" { "groups" (set ["noun" "object" "living-being"]) }
|
||||||
|
"cold" { "groups" (set ["property" "temperature"]) "as_property" "temperature" }
|
||||||
|
"dangerous" { "groups" (set ["property"]) "as_property" "safety" }
|
||||||
|
"planet" { "groups" (set ["noun" "group"]) }
|
||||||
|
"moon" { "groups" (set ["noun" "group"]) }
|
||||||
|
"color" { "groups" (set ["property" "group"]) }
|
||||||
|
"fly" { "groups" (set ["verb"]) }
|
||||||
|
"swim" { "groups" (set ["verb"]) }
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
(setv examples
|
||||||
|
[
|
||||||
|
{ "text" "icecream is cold"
|
||||||
|
"parsed" '(exists-property-with-value icecream cold) }
|
||||||
|
{ "text" "is icecream cold?"
|
||||||
|
"parsed" '(question (exists-property-with-value icecream cold)) }
|
||||||
|
{ "text" "lava is dangerous"
|
||||||
|
"parsed" '(exists-property-with-value lava dangerous) }
|
||||||
|
{ "text" "is lava dangerous?"
|
||||||
|
"parsed" '(question (exists-property-with-value lava dangerous)) }
|
||||||
|
{ "text" "earth is a planet"
|
||||||
|
"parsed" '(pertenence-to-group earth planet) }
|
||||||
|
{ "text" "io is a moon"
|
||||||
|
"parsed" '(pertenence-to-group io moon) }
|
||||||
|
{ "text" "is earth a moon?"
|
||||||
|
"parsed" '(question (pertenence-to-group earth moon)) }
|
||||||
|
{ "text" "Green is a color"
|
||||||
|
"parsed" '(pertenence-to-group green color) }
|
||||||
|
{ "text" "a plane can fly"
|
||||||
|
"parsed" '(has-capacity plane fly) }
|
||||||
|
{ "text" "a wale can swim"
|
||||||
|
"parsed" '(has-capacity wale swim) }
|
||||||
|
{
|
||||||
|
"text" "if earth is a planet it is big"
|
||||||
|
"parsed" '(implies
|
||||||
|
(pertenence-to-group earth planet)
|
||||||
|
(exists-property-with-value earth big)) }
|
||||||
|
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
(defn run_tests []
|
||||||
|
[
|
||||||
|
(setv knowledge (KnowledgeBase
|
||||||
|
knowledge=base_knowledge,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
@ -1,166 +0,0 @@
|
|||||||
from ..session.org_mode import global_session as session
|
|
||||||
import json
|
|
||||||
|
|
||||||
from ..knowledge_base import KnowledgeBase
|
|
||||||
from ..modifiable_property import is_modifiable_property
|
|
||||||
from ..utils.tokenization import train_basic_tokenization
|
|
||||||
|
|
||||||
examples = [
|
|
||||||
{
|
|
||||||
"text": "icecream is cold",
|
|
||||||
"parsed": ("exists-property-with-value", 'icecream', 'cold'),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "is icecream cold?",
|
|
||||||
"parsed": ("question", ("exists-property-with-value", 'icecream', 'cold'))
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "lava is dangerous",
|
|
||||||
"parsed": ("exists-property-with-value", 'lava', 'dangerous')
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "is lava dangerous?",
|
|
||||||
"parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "earth is a planet",
|
|
||||||
"parsed": ("pertenence-to-group", 'earth', 'planet'),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "io is a moon",
|
|
||||||
"parsed": ("pertenence-to-group", 'io', 'moon'),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "is earth a moon?",
|
|
||||||
"parsed": ("question", ("pertenence-to-group", 'earth', 'moon')),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "Green is a color",
|
|
||||||
"parsed": ("pertenence-to-group", 'green', 'color'),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "a plane can fly",
|
|
||||||
"parsed": ("has-capacity", 'plane', 'fly')
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"text": "a wale can swim",
|
|
||||||
"parsed": ("has-capacity", 'wale', 'swim')
|
|
||||||
},
|
|
||||||
# {
|
|
||||||
# "text": "if earth is a planet, it is big",
|
|
||||||
# "parsed": ("implies",
|
|
||||||
# ("pertenence-to-group", 'earth', 'planet'),
|
|
||||||
# ("exists-property-with-value", 'earth', 'big')),
|
|
||||||
# },
|
|
||||||
]
|
|
||||||
|
|
||||||
base_knowledge = {
|
|
||||||
'icecream': {
|
|
||||||
"groups": {'noun', 'object', 'comestible', 'sweet'},
|
|
||||||
},
|
|
||||||
'lava': {
|
|
||||||
"groups": {'noun', 'object'},
|
|
||||||
},
|
|
||||||
'earth': {
|
|
||||||
"groups": {'noun', 'object', 'planet'},
|
|
||||||
},
|
|
||||||
'io': {
|
|
||||||
"groups": {'noun', 'object'},
|
|
||||||
},
|
|
||||||
'green': {
|
|
||||||
"groups": {'noun', 'color', 'concept'},
|
|
||||||
},
|
|
||||||
'plane': {
|
|
||||||
"groups": {'noun', 'object', 'vehicle', 'fast'},
|
|
||||||
},
|
|
||||||
'car': {
|
|
||||||
"groups": {'noun', 'object', 'vehicle', 'slow-ish'},
|
|
||||||
},
|
|
||||||
'wale': {
|
|
||||||
"groups": {'noun', 'object', 'living-being'},
|
|
||||||
},
|
|
||||||
'cold': {
|
|
||||||
"groups": {'property', 'temperature'},
|
|
||||||
"as_property": "temperature",
|
|
||||||
},
|
|
||||||
'dangerous': {
|
|
||||||
"groups": {'property'},
|
|
||||||
"as_property": "safety",
|
|
||||||
},
|
|
||||||
'planet': {
|
|
||||||
"groups": {'noun', 'group'},
|
|
||||||
},
|
|
||||||
'moon': {
|
|
||||||
"groups": {'noun', 'group'},
|
|
||||||
},
|
|
||||||
'color': {
|
|
||||||
"groups": {'property', 'group'},
|
|
||||||
},
|
|
||||||
'fly': {
|
|
||||||
"groups": {'verb'},
|
|
||||||
},
|
|
||||||
'bus': {
|
|
||||||
"groups": {'noun'},
|
|
||||||
},
|
|
||||||
'run': {
|
|
||||||
"groups": {'verb'},
|
|
||||||
},
|
|
||||||
'swim': {
|
|
||||||
"groups": {'verb'},
|
|
||||||
},
|
|
||||||
'planet': {
|
|
||||||
'groups': {'noun'}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def test_assumption(expectedResponse, knowledge, query):
|
|
||||||
with session().log(query['text']):
|
|
||||||
session().annotate("Expected: {}".format(expectedResponse))
|
|
||||||
|
|
||||||
result, abstract_tree, diff = knowledge.process(query['text'])
|
|
||||||
end_result = result.getter() if is_modifiable_property(result) else result
|
|
||||||
|
|
||||||
session().annotate("Result: {}".format(end_result))
|
|
||||||
if end_result != expectedResponse:
|
|
||||||
raise AssertionError('{} is not {}'.format(end_result, expectedResponse))
|
|
||||||
|
|
||||||
def main():
|
|
||||||
knowledge = KnowledgeBase(
|
|
||||||
knowledge=base_knowledge,
|
|
||||||
)
|
|
||||||
|
|
||||||
train_basic_tokenization(knowledge)
|
|
||||||
|
|
||||||
for example in examples:
|
|
||||||
with session().log(example['text']):
|
|
||||||
differences = knowledge.train([example])
|
|
||||||
|
|
||||||
session().annotate("----")
|
|
||||||
session().annotate(differences())
|
|
||||||
session().annotate("----")
|
|
||||||
|
|
||||||
test_assumption(True, knowledge, {'text': 'earth is a planet'})
|
|
||||||
test_assumption(True, knowledge, {'text': 'is lava dangerous?'})
|
|
||||||
for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
|
|
||||||
row = test['text']
|
|
||||||
result, inferred_tree, differences = knowledge.process(row)
|
|
||||||
|
|
||||||
session().annotate("result: {}".format(result))
|
|
||||||
session().annotate(differences())
|
|
||||||
session().annotate("---")
|
|
||||||
session().annotate('-----')
|
|
||||||
session().annotate(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
|
|
||||||
session().annotate('-----')
|
|
||||||
|
|
||||||
queryTrue = {
|
|
||||||
"text": "is io a moon?",
|
|
||||||
"parsed": ("question", ("pertenence-to-group", "io", "moon"))
|
|
||||||
}
|
|
||||||
queryFalse = {
|
|
||||||
"text": "is io a planet?",
|
|
||||||
"parsed": ("question", ("pertenence-to-group", "io", "planet"))
|
|
||||||
}
|
|
||||||
|
|
||||||
test_assumption(False, knowledge, queryFalse)
|
|
||||||
test_assumption(True, knowledge, queryTrue)
|
|
||||||
return knowledge
|
|
@ -1,736 +0,0 @@
|
|||||||
from ..session.org_mode import global_session as session
|
|
||||||
from ..knowledge_base import KnowledgeBase
|
|
||||||
from ..utils.visuals import show_progbar
|
|
||||||
from ..visualization import show_knowledge
|
|
||||||
from ..utils.tokenization import train_basic_tokenization
|
|
||||||
|
|
||||||
def _assert(args):
|
|
||||||
assert(args)
|
|
||||||
|
|
||||||
def _assert_msg(args, msg):
|
|
||||||
assert args, msg
|
|
||||||
|
|
||||||
examples = [
|
|
||||||
('full_example',
|
|
||||||
{
|
|
||||||
"text": "is icecream cold?",
|
|
||||||
"affirmation": "icecream is cold",
|
|
||||||
"parsed": ("question",
|
|
||||||
("exists-property-with-value", 'icecream', 'cold')),
|
|
||||||
"answer": True,
|
|
||||||
"after_execution": [(
|
|
||||||
lambda knowledge: _assert('cold' in knowledge.knowledge['icecream']['property'])
|
|
||||||
),],
|
|
||||||
}),
|
|
||||||
('full_example',
|
|
||||||
{
|
|
||||||
"text": "is earth a planet?",
|
|
||||||
"affirmation": "earth is a planet",
|
|
||||||
"parsed": ("question",
|
|
||||||
("pertenence-to-group", 'earth', 'planet')),
|
|
||||||
"answer": True,
|
|
||||||
"after_execution": [(
|
|
||||||
lambda knowledge: _assert('planet' in knowledge.knowledge['earth']['groups'])
|
|
||||||
),],
|
|
||||||
}),
|
|
||||||
('full_example',
|
|
||||||
{
|
|
||||||
"text": "Is green a color?",
|
|
||||||
"affirmation": "green is a color",
|
|
||||||
"parsed": ("question",
|
|
||||||
("pertenence-to-group", 'green', 'color')),
|
|
||||||
"answer": True,
|
|
||||||
"after_execution": [(
|
|
||||||
lambda knowledge: _assert('color' in knowledge.knowledge['green']['groups'])
|
|
||||||
),],
|
|
||||||
}),
|
|
||||||
('full_example',
|
|
||||||
{
|
|
||||||
"text": "do airplanes fly?",
|
|
||||||
"affirmation": "airplanes fly",
|
|
||||||
"parsed": ("question",
|
|
||||||
("has-capacity", 'plane', 'fly')),
|
|
||||||
"answer": True,
|
|
||||||
"after_execution": [(
|
|
||||||
lambda knowledge: _assert('fly' in knowledge.knowledge['plane']['capacities'])
|
|
||||||
),],
|
|
||||||
}),
|
|
||||||
('full_example',
|
|
||||||
{
|
|
||||||
"text": "Is it hot during the summer?",
|
|
||||||
"affirmation": "it is hot during summer",
|
|
||||||
"parsed": ("question",
|
|
||||||
("implies", 'summer', 'hot')),
|
|
||||||
"answer": True,
|
|
||||||
"after_execution": [(
|
|
||||||
lambda knowledge: _assert('hot' in knowledge.knowledge['summer']['implications'])
|
|
||||||
),],
|
|
||||||
}),
|
|
||||||
('full_example',
|
|
||||||
{
|
|
||||||
"text": "is chile in south america ?",
|
|
||||||
"affirmation": "chile is in south america",
|
|
||||||
"parsed": ("question",
|
|
||||||
("property-has-value", 'chile', 'location', 'south america')),
|
|
||||||
"answer": True,
|
|
||||||
"after_execution": [(
|
|
||||||
lambda knowledge: _assert('south america' in knowledge.knowledge['chile']['location'])
|
|
||||||
),],
|
|
||||||
}),
|
|
||||||
('full_example',
|
|
||||||
{
|
|
||||||
"text": "Was Socrates a man?",
|
|
||||||
"affirmation": "Socrates was a man",
|
|
||||||
"parsed": ("question",
|
|
||||||
("pertenence-to-group", 'socrates', 'man')),
|
|
||||||
"answer": True,
|
|
||||||
"after_execution": [(
|
|
||||||
lambda knowledge: _assert('man' in knowledge.knowledge['socrates']['groups'])
|
|
||||||
),],
|
|
||||||
}),
|
|
||||||
('full_example',
|
|
||||||
{
|
|
||||||
"text": "Computers use electricity?",
|
|
||||||
"affirmation": "Computers use electricity",
|
|
||||||
"parsed": ("question",
|
|
||||||
('perform-verb-over-object', 'computers', 'use', 'electricity')),
|
|
||||||
"answer": True,
|
|
||||||
"after_execution": [(
|
|
||||||
lambda knowledge: _assert('electricity' in knowledge.knowledge['computers']['performs-over']['use'])
|
|
||||||
),],
|
|
||||||
}),
|
|
||||||
# ('full_example',
|
|
||||||
# {
|
|
||||||
# "text": "The dominant language in france is french?",
|
|
||||||
# "affirmation": "The dominant language in france is french",
|
|
||||||
# "parsed": ("question",
|
|
||||||
# ("property-has-value", "france", "dominant-language", "french")),
|
|
||||||
# "answer": True,
|
|
||||||
# }),
|
|
||||||
# {
|
|
||||||
# "text": "was abraham lincoln once president of the united states?",
|
|
||||||
# "affirmation": "was abraham lincoln once president of the united states?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
('text_example',
|
|
||||||
{
|
|
||||||
"question": "is milk white?",
|
|
||||||
"affirmation": "milk is white",
|
|
||||||
"answer": True,
|
|
||||||
}),
|
|
||||||
# {
|
|
||||||
# "text": "do people have emotions?",
|
|
||||||
# "affirmation": "do people have emotions?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "do objects appear smaller as they move away from you?",
|
|
||||||
# "affirmation": "do objects appear smaller as they move away from you?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Does the human species have a male and female gender?",
|
|
||||||
# "affirmation": "Does the human species have a male and female gender?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is a mountain mostly made of rock?",
|
|
||||||
# "affirmation": "Is a mountain mostly made of rock?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "is sun microsystems a computer company?",
|
|
||||||
# "affirmation": "is sun microsystems a computer company?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Do you see with your eyes and smell with your nose?",
|
|
||||||
# "affirmation": "Do you see with your eyes and smell with your nose?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is smoking bad for your health?",
|
|
||||||
# "affirmation": "Is smoking bad for your health?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Does a dog have four legs?",
|
|
||||||
# "affirmation": "Does a dog have four legs?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Do mammals have hearts?",
|
|
||||||
# "affirmation": "Do mammals have hearts?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "is the Earth a planet?",
|
|
||||||
# "affirmation": "is the Earth a planet?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# ('text_example',
|
|
||||||
# {
|
|
||||||
# "question": "is water a liquid?",
|
|
||||||
# "affirmation": "water is a liquid",
|
|
||||||
# "answer": True,
|
|
||||||
# }),
|
|
||||||
# {
|
|
||||||
# "text": "Is Bugs Bunny a cartoon character?",
|
|
||||||
# "affirmation": "Is Bugs Bunny a cartoon character?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Do Humans communicate by Telephone?",
|
|
||||||
# "affirmation": "Do Humans communicate by Telephone?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "is beer a drink ?",
|
|
||||||
# "affirmation": "is beer a drink ?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "are there 12 months in a year?",
|
|
||||||
# "affirmation": "are there 12 months in a year?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "does the sun hurt your eyes when you look at it?",
|
|
||||||
# "affirmation": "does the sun hurt your eyes when you look at it?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Do most cars have doors?",
|
|
||||||
# "affirmation": "Do most cars have doors?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "is orange both a fruit and a colour?",
|
|
||||||
# "affirmation": "is orange both a fruit and a colour?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is water a necessity?",
|
|
||||||
# "affirmation": "Is water a necessity?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Do CDs have better quality sound than Cassettes?",
|
|
||||||
# "affirmation": "Do CDs have better quality sound than Cassettes?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "do animals die?",
|
|
||||||
# "affirmation": "do animals die?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is the arctic cold?",
|
|
||||||
# "affirmation": "Is the arctic cold?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Do people have 2 eyes?",
|
|
||||||
# "affirmation": "Do people have 2 eyes?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "does a person have a brain?",
|
|
||||||
# "affirmation": "does a person have a brain?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is the rain wet?",
|
|
||||||
# "affirmation": "Is the rain wet?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is division a mathematical operation?",
|
|
||||||
# "affirmation": "Is division a mathematical operation?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "is 400 greater than 399?",
|
|
||||||
# "affirmation": "is 400 greater than 399?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "is magenta a color?",
|
|
||||||
# "affirmation": "is magenta a color?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Are books educational?",
|
|
||||||
# "affirmation": "Are books educational?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Was the Great Wall of China built by humans?",
|
|
||||||
# "affirmation": "Was the Great Wall of China built by humans?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Are pianos musical instruments?",
|
|
||||||
# "affirmation": "Are pianos musical instruments?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Has Bill Clinton been President of the United States?",
|
|
||||||
# "affirmation": "Has Bill Clinton been President of the United States?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is a whale a mammal?",
|
|
||||||
# "affirmation": "Is a whale a mammal?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Are lemons yellow?",
|
|
||||||
# "affirmation": "Are lemons yellow?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is the South Pole cold?",
|
|
||||||
# "affirmation": "Is the South Pole cold?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is Africa warm?",
|
|
||||||
# "affirmation": "Is Africa warm?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is Antarctica cold?",
|
|
||||||
# "affirmation": "Is Antarctica cold?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is rock is generally harder than wood?",
|
|
||||||
# "affirmation": "Is rock is generally harder than wood?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Do dogs chase cats?",
|
|
||||||
# "affirmation": "Do dogs chase cats?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "can humans die from cold temperatures?",
|
|
||||||
# "affirmation": "can humans die from cold temperatures?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "do people enjoy conversation?",
|
|
||||||
# "affirmation": "do people enjoy conversation?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is Bill Clinton the President of the United States?",
|
|
||||||
# "affirmation": "Is Bill Clinton the President of the United States?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Are books a good source of information?",
|
|
||||||
# "affirmation": "Are books a good source of information?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "are friends different than enemies?",
|
|
||||||
# "affirmation": "are friends different than enemies?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "are people alive?",
|
|
||||||
# "affirmation": "are people alive?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Do triangles have 3 sides?",
|
|
||||||
# "affirmation": "Do triangles have 3 sides?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is Ice cream cold?",
|
|
||||||
# "affirmation": "Is Ice cream cold?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Are all sides of a square the same length?",
|
|
||||||
# "affirmation": "Are all sides of a square the same length?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Do all people eat food?",
|
|
||||||
# "affirmation": "Do all people eat food?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "do dentists repair teeth?",
|
|
||||||
# "affirmation": "do dentists repair teeth?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is America bigger than Japan?",
|
|
||||||
# "affirmation": "Is America bigger than Japan?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Do all triangles have three sides?",
|
|
||||||
# "affirmation": "Do all triangles have three sides?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "A grocery store sales food?",
|
|
||||||
# "affirmation": "A grocery store sales food?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Does a sunburn cause pain?",
|
|
||||||
# "affirmation": "Does a sunburn cause pain?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is a computer an invention?",
|
|
||||||
# "affirmation": "Is a computer an invention?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "have humans visited the moon?",
|
|
||||||
# "affirmation": "have humans visited the moon?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Are there people in India?",
|
|
||||||
# "affirmation": "Are there people in India?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Was Einstein a genius?",
|
|
||||||
# "affirmation": "Was Einstein a genius?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Are we on the planet earth?",
|
|
||||||
# "affirmation": "Are we on the planet earth?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "do people comb their hair in the morning?",
|
|
||||||
# "affirmation": "do people comb their hair in the morning?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Does it hurt to lose a friend?",
|
|
||||||
# "affirmation": "Does it hurt to lose a friend?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Are there people on the earth?",
|
|
||||||
# "affirmation": "Are there people on the earth?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Was George Washington a president of the United States of America?",
|
|
||||||
# "affirmation": "Was George Washington a president of the United States of America?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Does an ocean have salt water in it?",
|
|
||||||
# "affirmation": "Does an ocean have salt water in it?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is night darker than day?",
|
|
||||||
# "affirmation": "Is night darker than day?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Does a triangle have three sides?",
|
|
||||||
# "affirmation": "Does a triangle have three sides?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Are peaches fruit?",
|
|
||||||
# "affirmation": "Are peaches fruit?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Do people urinate?",
|
|
||||||
# "affirmation": "Do people urinate?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is Germany located in Europe?",
|
|
||||||
# "affirmation": "Is Germany located in Europe?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Do mirrors reflect light?",
|
|
||||||
# "affirmation": "Do mirrors reflect light?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Are people born naked?",
|
|
||||||
# "affirmation": "Are people born naked?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is it hot near the equator?",
|
|
||||||
# "affirmation": "Is it hot near the equator?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "is paper made from trees?",
|
|
||||||
# "affirmation": "is paper made from trees?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Can a female have children?",
|
|
||||||
# "affirmation": "Can a female have children?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Are people born every day?",
|
|
||||||
# "affirmation": "Are people born every day?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Are shoes worn on the feet?",
|
|
||||||
# "affirmation": "Are shoes worn on the feet?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "does it get wet when it rains?",
|
|
||||||
# "affirmation": "does it get wet when it rains?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Are there plants and insects in the rainforest which have no names?",
|
|
||||||
# "affirmation": "Are there plants and insects in the rainforest which have no names?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Do people eat pigs?",
|
|
||||||
# "affirmation": "Do people eat pigs?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Do businessmen wear ties?",
|
|
||||||
# "affirmation": "Do businessmen wear ties?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is New York in the United States?",
|
|
||||||
# "affirmation": "Is New York in the United States?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Are humans more intelligent than ants?",
|
|
||||||
# "affirmation": "Are humans more intelligent than ants?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Are ravens black?",
|
|
||||||
# "affirmation": "Are ravens black?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Are there rats on ships?",
|
|
||||||
# "affirmation": "Are there rats on ships?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "are lions animals?",
|
|
||||||
# "affirmation": "are lions animals?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "6 is greater than 5?",
|
|
||||||
# "affirmation": "6 is greater than 5?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Is water made of hydrogen and oxygen?",
|
|
||||||
# "affirmation": "Is water made of hydrogen and oxygen?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "is the sky blue on a clear day?",
|
|
||||||
# "affirmation": "is the sky blue on a clear day?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "text": "Do most people work during the day?",
|
|
||||||
# "affirmation": "Do most people work during the day?",
|
|
||||||
# "parsed": (),
|
|
||||||
# "answer": None,
|
|
||||||
# },
|
|
||||||
]
|
|
||||||
|
|
||||||
base_knowledge = {
|
|
||||||
'summer': {
|
|
||||||
"groups": {'epoch'},
|
|
||||||
},
|
|
||||||
'fly': {
|
|
||||||
"groups": {'verb'},
|
|
||||||
},
|
|
||||||
'use': {
|
|
||||||
"groups": {'verb'},
|
|
||||||
},
|
|
||||||
'electricity': {
|
|
||||||
"groups": {'power'},
|
|
||||||
},
|
|
||||||
'airplanes': {},
|
|
||||||
'white': {
|
|
||||||
'groups': {'property'},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def main():
|
|
||||||
knowledge = KnowledgeBase(
|
|
||||||
knowledge=base_knowledge,
|
|
||||||
)
|
|
||||||
|
|
||||||
train_basic_tokenization(knowledge)
|
|
||||||
|
|
||||||
total = len(examples)
|
|
||||||
|
|
||||||
for i, (example_type, data) in enumerate(examples):
|
|
||||||
if example_type == 'full_example':
|
|
||||||
affirmation = {
|
|
||||||
'text': data['affirmation'],
|
|
||||||
'parsed': data['parsed'][1],
|
|
||||||
}
|
|
||||||
question = data
|
|
||||||
|
|
||||||
with session().log(data['affirmation']):
|
|
||||||
show_progbar(i, total, data['affirmation'])
|
|
||||||
differences = knowledge.train([affirmation])
|
|
||||||
|
|
||||||
with session().log(data['text']):
|
|
||||||
show_progbar(i, total, data['text'])
|
|
||||||
differences = knowledge.train([question])
|
|
||||||
session().annotate(differences())
|
|
||||||
|
|
||||||
result, _, _ = knowledge.process(data['text'])
|
|
||||||
|
|
||||||
if "after_execution" in data:
|
|
||||||
for f in data["after_execution"]:
|
|
||||||
f(knowledge)
|
|
||||||
|
|
||||||
if result != data['answer']:
|
|
||||||
raise AssertionError('{} is not {}'.format(result, data['answer']))
|
|
||||||
|
|
||||||
elif example_type == 'text_example':
|
|
||||||
with session().log(data['affirmation']):
|
|
||||||
show_progbar(i, total, data['affirmation'])
|
|
||||||
affirmation = data['affirmation']
|
|
||||||
session().annotate("Processing affirmation: {}".format(affirmation))
|
|
||||||
_, _, _ = knowledge.process(affirmation)
|
|
||||||
|
|
||||||
with session().log(data['question']):
|
|
||||||
show_progbar(i, total, data['question'])
|
|
||||||
question = data['question']
|
|
||||||
session().annotate("Processing question : {}".format(question))
|
|
||||||
result, _, _ = knowledge.process(question)
|
|
||||||
|
|
||||||
if result != data['answer']:
|
|
||||||
raise AssertionError('{} is not {}'.format(result, data['answer']))
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise NotImplementedError('Example type: {}'.format(example_type))
|
|
||||||
|
|
||||||
print("\r\x1b[K", end='')
|
|
||||||
return knowledge
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
show_knowledge(main())
|
|
@ -1,26 +0,0 @@
|
|||||||
from ..knowledge_base import KnowledgeBase
|
|
||||||
from ..session.org_mode import global_session as session
|
|
||||||
|
|
||||||
from . import gac_100
|
|
||||||
|
|
||||||
|
|
||||||
def ask_then_learn_test(knowledge: KnowledgeBase):
|
|
||||||
with session().log("is icecream blue?"):
|
|
||||||
ret, _, _ = knowledge.process("is icecream blue?")
|
|
||||||
assert(ret is False)
|
|
||||||
|
|
||||||
with session().log("icecream is blue"):
|
|
||||||
ret, _, _ = knowledge.process("icecream is blue")
|
|
||||||
|
|
||||||
with session().log("is icecream blue?"):
|
|
||||||
ret, _, _ = knowledge.process("is icecream blue?")
|
|
||||||
assert(ret is True)
|
|
||||||
|
|
||||||
return knowledge
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
knowledge = gac_100.main()
|
|
||||||
|
|
||||||
knowledge.knowledge['blue'] = {'groups': {'property'}}
|
|
||||||
knowledge = ask_then_learn_test(knowledge)
|
|
@ -1,80 +0,0 @@
|
|||||||
from ..session.org_mode import global_session as session
|
|
||||||
from ..knowledge_base import KnowledgeBase
|
|
||||||
from ..utils.visuals import show_progbar
|
|
||||||
from ..visualization import show_knowledge
|
|
||||||
|
|
||||||
|
|
||||||
def _assert(args):
|
|
||||||
assert(args)
|
|
||||||
|
|
||||||
|
|
||||||
def _assert_msg(args, msg):
|
|
||||||
assert args, msg
|
|
||||||
|
|
||||||
|
|
||||||
EXAMPLES = [
|
|
||||||
('example', {
|
|
||||||
"text": 'cat',
|
|
||||||
"tokens": ['cat'],
|
|
||||||
}),
|
|
||||||
('example', {
|
|
||||||
"text": 'cats',
|
|
||||||
"tokens": ['cats'],
|
|
||||||
"meaning": { 'cats': ('add-modifier', 'cat', 'plural') },
|
|
||||||
}),
|
|
||||||
('example', {
|
|
||||||
"text": 'text separated by spaces',
|
|
||||||
"tokens": ['text', 'separated', 'by', 'spaces'],
|
|
||||||
}),
|
|
||||||
('example', {
|
|
||||||
"text": 'is earth a planet?',
|
|
||||||
"tokens": ['is', 'earth', 'a', 'planet', '?'],
|
|
||||||
}),
|
|
||||||
('test', {
|
|
||||||
"text": 'plane',
|
|
||||||
"tokens": ['plane'],
|
|
||||||
}),
|
|
||||||
# ('test', {
|
|
||||||
# "text": 'planes',
|
|
||||||
# "tokens": ['planes'],
|
|
||||||
# "meaning": { 'planes': ('add-modifier', 'plane', 'plural') },
|
|
||||||
# }),
|
|
||||||
('test', {
|
|
||||||
"text": 'some other text',
|
|
||||||
"tokens": ['some', 'other', 'text'],
|
|
||||||
}),
|
|
||||||
('test', {
|
|
||||||
"text": 'is the sun a star?',
|
|
||||||
"tokens": ['is', 'the', 'sun', 'a', 'star', '?'],
|
|
||||||
}),
|
|
||||||
('test', {
|
|
||||||
"text": 'sometextnotseparatedbyspaces',
|
|
||||||
"tokens": ['some', 'text', 'not', 'separated', 'by', 'spaces'],
|
|
||||||
})
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
knowledge = KnowledgeBase()
|
|
||||||
|
|
||||||
total = len(EXAMPLES)
|
|
||||||
|
|
||||||
for i, (case_type, example) in enumerate(EXAMPLES):
|
|
||||||
show_progbar(i, total, example['text'])
|
|
||||||
if case_type == 'example':
|
|
||||||
with session().log(example['text']):
|
|
||||||
knowledge.layers.tokenization.train(example)
|
|
||||||
|
|
||||||
elif case_type == 'test':
|
|
||||||
with session().log(example['text']):
|
|
||||||
tokens = list(knowledge.layers.tokenization.tokenize(example['text']))
|
|
||||||
|
|
||||||
session().log('Expected “{}”, found “{}”'
|
|
||||||
.format(example['tokens'], tokens))
|
|
||||||
assert example['tokens'] == tokens
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise Exception('Not implemented case {}'.format(case_type))
|
|
||||||
|
|
||||||
print("\r\x1b[K", end='')
|
|
||||||
return knowledge
|
|
@ -1,4 +0,0 @@
|
|||||||
def dumper(obj):
|
|
||||||
if isinstance(obj, set):
|
|
||||||
return list(obj)
|
|
||||||
return obj
|
|
@ -1,29 +0,0 @@
|
|||||||
from ..session.org_mode import (
|
|
||||||
global_session as session,
|
|
||||||
)
|
|
||||||
|
|
||||||
BASIC_TOKENIZATION_EXAMPLES = (
|
|
||||||
({
|
|
||||||
"text": 'cat',
|
|
||||||
"tokens": ['cat'],
|
|
||||||
}),
|
|
||||||
({
|
|
||||||
"text": 'cats',
|
|
||||||
"tokens": ['cats'],
|
|
||||||
"meaning": { 'cats': ('add-modifier', 'cat', 'plural') },
|
|
||||||
}),
|
|
||||||
({
|
|
||||||
"text": 'text separated by spaces',
|
|
||||||
"tokens": ['text', 'separated', 'by', 'spaces'],
|
|
||||||
}),
|
|
||||||
({
|
|
||||||
"text": 'is earth a planet?',
|
|
||||||
"tokens": ['is', 'earth', 'a', 'planet', '?'],
|
|
||||||
}),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def train_basic_tokenization(knowledge_base):
|
|
||||||
with session().log('Training basic tokenization'):
|
|
||||||
for example in BASIC_TOKENIZATION_EXAMPLES:
|
|
||||||
knowledge_base.layers.tokenization.train(example)
|
|
@ -1,15 +0,0 @@
|
|||||||
def show_progbar(done, total, msg=''):
|
|
||||||
total_blocks = 10
|
|
||||||
blocks_done = (done * total_blocks) // total
|
|
||||||
blocks_to_go = total_blocks - blocks_done
|
|
||||||
|
|
||||||
print('\r\x1b[K' # Go to the start of the line
|
|
||||||
'\x1b[0m' # Restart the "style"
|
|
||||||
'|' # Put the first "|"
|
|
||||||
+ blocks_done * '█' # Completed blocks
|
|
||||||
+ blocks_to_go * ' ' # Uncompleted blocks
|
|
||||||
+ '\x1b[7m|\x1b[0m' # End the bar
|
|
||||||
+ ' '
|
|
||||||
+ msg # Add message
|
|
||||||
+ '\r' # Go back to the start
|
|
||||||
, end='')
|
|
@ -1,8 +0,0 @@
|
|||||||
def show_knowledge(knowledge):
|
|
||||||
for key in knowledge.knowledge:
|
|
||||||
print("\x1b[1m{}\x1b[0m {}".format(key, knowledge.knowledge[key]))
|
|
||||||
|
|
||||||
|
|
||||||
def show_samples(knowledge):
|
|
||||||
for example in knowledge.originals:
|
|
||||||
print("{}".format(example))
|
|
Loading…
Reference in New Issue
Block a user