Merge branch 'layered-model' into naive-nlu

Properly handle solutions not found.
Pass test using layer structure.
2018-04-25 20:17:53 +02:00 · 2018-04-24 23:12:14 +02:00 · 2018-04-24 23:01:36 +02:00 · 2018-04-23 22:48:10 +02:00 · 2018-04-16 00:00:12 +02:00 · 2018-04-15 22:15:28 +02:00
22 changed files with 2318 additions and 574 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,7 @@
 *#*
 *~
+.vscode
 *.ba?k
 *.pyc
 __pycache__
+treeNLU-*session*.org
--- a/naive-nlu/cli.py
+++ b/naive-nlu/cli.py
@ -0,0 +1,4 @@
+from tree_nlu import cli
+
+if __name__ == '__main__':
+    cli.main()
--- a/naive-nlu/tree_nlu/atoms.py
+++ b/naive-nlu/tree_nlu/atoms.py
@ -0,0 +1,23 @@
+'''
+Analogous to erlang ones.
+
+"An atom is a literal, a constant with name."
+'''
+
+from collections import namedtuple
+
+Atom = namedtuple('Atom', field_names='name')
+
+def is_atom(element, name=None):
+    '''Check if an element is an atom with a specific name.'''
+    if not isinstance(element, Atom):
+        return False
+
+    if name is None:
+        return True
+
+    return element.name == name
+
+def a(name):
+    '''Build an atom with a given name.'''
+    return Atom(name)
--- a/naive-nlu/tree_nlu/cli.py
+++ b/naive-nlu/tree_nlu/cli.py
@ -0,0 +1,65 @@
+import logging
+import datetime
+from .session.org_mode import (
+    global_session as session,
+    create_global_session,
+)
+from .knowledge_base import KnowledgeBase
+from .visualization import (
+    show_knowledge,
+    show_samples,
+)
+from .tests import gac_100
+from .modifiable_property import (
+    ModifiableProperty,
+    ModifiablePropertyWithAst,
+    is_modifiable_property,
+)
+
+
+bye_phrases = ['bye', 'exit']
+
+
+def gen_session_name():
+    now = datetime.datetime.utcnow()
+    return "treeNLU-cli-session-{}.org".format(
+        now.strftime("%y_%m_%d %H:%M:%S_%f"))
+
+
+def main():
+    create_global_session(gen_session_name())
+    logging.getLogger().setLevel(logging.INFO)
+    knowledge = gac_100.main()
+    logging.getLogger().setLevel(logging.DEBUG)
+    while True:
+        try:
+            data = input("> ").strip()
+        except EOFError:
+            print("bye")
+            break
+        if data.lower() in bye_phrases:
+            break
+        if not data:
+            continue
+
+        if data == '/show':
+            show_knowledge(knowledge)
+            continue
+        elif data == '/samples':
+            show_samples(knowledge)
+            continue
+
+        with session().log(data):
+            ret = knowledge.process(data)
+            if ret:
+                result, _, _ = ret
+                if not is_modifiable_property(result):
+                    print("<", result)
+                else:
+                    result.setter()
+                    print("OK")
+            elif ret is None:
+                print("- Couldn't understand that, oops... -")
+            else:
+                print("Unhandled response:", ret)
+    print("< Bye!")
--- a/naive-nlu/tree_nlu/knowledge_base.py
+++ b/naive-nlu/tree_nlu/knowledge_base.py
@ -1,45 +1,65 @@
 import copy
-
 import logging

-from . import parsing
+from .session.org_mode import global_session as session
+
+from .atoms import Atom
+from . import layered_model
 from . import knowledge_evaluation
 from .modifiable_property import is_modifiable_property
-
+import random

 def diff_knowledge(before, after):
    import jsondiff
    return jsondiff.diff(before, after)


+
 class KnowledgeBase(object):
-    def __init__(self, knowledge, examples=[], trained=[]):
+    def __init__(self, knowledge={}, examples=[], trained=[]):
        self.knowledge = copy.copy(knowledge)
+        self.originals = []
        self.examples = copy.copy(examples)
        self.trained = copy.copy(trained)
+        self.layers = layered_model.BaseModel(self)

+    ## Parsing
    def train(self, examples):
        knowledge_before = copy.deepcopy(self.knowledge)
-
+        with session().log('Train'):
            # Parse everything
-        parsed_examples = []
            for example in examples:
-            logging.info("\x1b[7;32m> {} \x1b[0m".format(example))
-            tokens, decomposition, inferred_tree = parsing.integrate_language(self, example)
-            logging.info(tokens)
+                # If there's parsed data, leverage it ASAP
+                if 'parsed' in example and isinstance(example['parsed'], tuple):
+                    with session().log('parsed information integration'):
+                        result = knowledge_evaluation.integrate_information(self.knowledge, {
+                            "parsed": example['parsed'],
+                        })
+                        self.act_upon(result)
+
+                with session().log("language integration"):
+                    for tokens, decomposition, inferred_tree in self.layers.integrate(self, example):
+                        session().annotate("Tokens: {}".format(tokens))
+                        session().annotate("Inferred tree: {}".format(inferred_tree))
+
+                with session().log("full information integration"):
+                    tokens = self.layers.tokenization.tokenize(example['text'], return_one=True)
                    result = knowledge_evaluation.integrate_information(self.knowledge, {
                        "elements": tokens,
                        "decomposition": decomposition,
                        "parsed": inferred_tree,
                    })

-            logging.info("\x1b[7;33m< {} \x1b[0m".format(self.get_value(result)))
+                    session().annotate("Result: {}".format(self.get_value(result)))
                    self.act_upon(result)
-            logging.info("\x1b[7;34m> set: {} \x1b[0m".format(self.get_value(result)))
+                    session().annotate("Set: {}".format(self.get_value(result)))
                    self.examples.append((decomposition, inferred_tree))
+                    self.originals.append(example['text'])

                # Reduce values
-            self.trained = parsing.reprocess_language_knowledge(self, self.examples)
+                with session().log("reprocessing"):
+                    res = self.layers.reprocess(self.examples)
+                    self.trained = res

            knowledge_after = copy.deepcopy(self.knowledge)
            knowledge_diff_getter = lambda: diff_knowledge(knowledge_before,
@ -47,18 +67,21 @@ class KnowledgeBase(object):

            return knowledge_diff_getter

-
    def process(self, row):
        knowledge_before = copy.deepcopy(self.knowledge)
-        logging.info("\x1b[7;32m> {} \x1b[0m".format(row))
-        tokens = parsing.to_tokens(row)
-        tokens, inferred_tree = parsing.get_fit(self, tokens)
+        with session().log("Process: {}".format(row)):
+            fit = list(self.layers.process(self, row))
+            if len(fit) == 0:
+                return None
+
+            tokens, inferred_tree = fit[0]
            result = knowledge_evaluation.integrate_information(self.knowledge,
                                                                {
                                                                    "elements": tokens,
                                                                    "parsed": inferred_tree,
                                                                })
            self.act_upon(result)
+            session().annotate("Result: {}".format(result))

            knowledge_after = copy.deepcopy(self.knowledge)
            knowledge_diff_getter = lambda: diff_knowledge(knowledge_before,
--- a/naive-nlu/tree_nlu/knowledge_evaluation.py
+++ b/naive-nlu/tree_nlu/knowledge_evaluation.py
@ -1,3 +1,5 @@
+from .session.org_mode import global_session as session
+
 from .modifiable_property import (
    ModifiableProperty,
    ModifiablePropertyWithAst,
@ -9,6 +11,7 @@ def resolve(knowledge_base, elements, value):
    if isinstance(value, int):
        return elements[value]
    elif isinstance(value, tuple) or isinstance(value, list):
+        session().annotate("V: {} {}".format(value, elements))
        return integrate_information(knowledge_base, {
            "elements": elements,
            "parsed": value,
@ -41,16 +44,42 @@ def get_subquery_type(knowledge_base, atom):


 def property_for_value(knowledge_base, value):
+    if value in knowledge_base:
+        # Annotate the property as property
+        groups = knowledge_base[value].get('groups', {'property'})
+        groups.add('property')
+        knowledge_base[value]['groups'] = groups
+
+        # And find the property "name"
+        if 'as_property' in knowledge_base[value]:
            return knowledge_base[value]['as_property']

+        return knowledge_base[value].get('groups', {'property'})
+    else:
+        # Consider that any property is... a property
+        knowledge_base[value] = {'groups': {'property'}}
+        return {'property'}
+

 def modifiable_property_from_property(prop, path, value):
    def getter():
        nonlocal prop, path, value
+        if isinstance(path, set):
+            # If the property is from a set, it's true if any possible
+            # path has a element as true
+            return any(map(lambda possible_path: ((possible_path in prop)
+                                                  and
+                                                  (prop[possible_path] == value)),
+                           path))
+        else:
            return (path in prop) and prop[path] == value

    def setter():
        nonlocal prop, path, value
+        if isinstance(path, set):
+            for possible_path in path:
+                prop[possible_path] = value
+        else:
            prop[path] = value

    return ModifiableProperty(
@ -74,12 +103,31 @@ def exists_property_with_value(knowledge_base, elements, subj, value):


 def modifiable_element_for_existance_in_set(container, set_name, element):
+    session().annotate("-----({} {} {})".format(container, set_name, element))
+
    def getter():
        nonlocal container, set_name, element
+        session().annotate("  get({} {} {})".format(container, set_name, element))
        return (set_name in container) and (element in container[set_name])

    def setter():
        nonlocal container, set_name, element
+        session().annotate("  add({} {} {})".format(container, set_name, element))
+        return container[set_name].add(element)
+
+    return ModifiableProperty(
+        getter=getter,
+        setter=setter,
+    )
+
+def modifiable_element_for_existance_in_group(container, element, backlink, set_name='groups'):
+    def getter():
+        nonlocal container, element, backlink, set_name
+        return (set_name in container) and (element in container[set_name])
+
+    def setter():
+        nonlocal container, set_name, element
+        backlink['groups'].add(set_name)
        return container[set_name].add(element)

    return ModifiableProperty(
@ -92,17 +140,22 @@ def pertenence_to_group(knowledge_base, elements, subj, group):
    group = resolve(knowledge_base, elements, group)

    if subj not in knowledge_base:
-        knowledge_base[subj] = {}
+        knowledge_base[subj] = {'groups': set()}

    if "groups" not in knowledge_base[subj]:
        knowledge_base[subj]["groups"] = set()

-    return modifiable_element_for_existance_in_set(
-        container=knowledge_base[subj],
-        set_name="groups",
-        element=group
-    )
+    if group not in knowledge_base:
+        knowledge_base[group] = {'groups': set()}

+    if "groups" not in knowledge_base[group]:
+        knowledge_base[group]["groups"] = set()
+
+    return modifiable_element_for_existance_in_group(
+        container=knowledge_base[subj],
+        element=group,
+        backlink=knowledge_base[group],
+    )

 def has_capacity(knowledge_base, elements, subj, capacity):
    subj = resolve(knowledge_base, elements, subj)
@ -128,12 +181,70 @@ def question(knowledge_base, elements, subj):
        return subj.getter()
    return subj

+def implies(knowledge_base, elements, precedent, consequent):
+    precedent = resolve(knowledge_base, elements, precedent)
+    consequent = resolve(knowledge_base, elements, consequent)
+
+    if precedent not in knowledge_base:
+        knowledge_base[precedent] = {'groups': set()}
+
+    if "implications" not in knowledge_base[precedent]:
+        knowledge_base[precedent]["implications"] = set()
+
+    return modifiable_element_for_existance_in_set(
+        container=knowledge_base[precedent],
+        set_name="implications",
+        element=consequent
+    )
+
+
+def property_has_value(knowledge_base, elements, subj, prop, value):
+    subj = resolve(knowledge_base, elements, subj)
+    prop = resolve(knowledge_base, elements, prop)
+    value = resolve(knowledge_base, elements, value)
+
+    if subj not in knowledge_base:
+        knowledge_base[subj] = {'groups': set()}
+
+    if prop not in knowledge_base[subj]:
+        knowledge_base[subj][prop] = set()
+
+    return modifiable_element_for_existance_in_set(
+        container=knowledge_base[subj],
+        set_name=prop,
+        element=value
+    )
+
+def perform_verb_over_object(knowledge_base, elements, subj, verb, obj):
+    subj = resolve(knowledge_base, elements, subj)
+    verb = resolve(knowledge_base, elements, verb)
+    obj = resolve(knowledge_base, elements, obj)
+    session().annotate("({} {} {})".format(verb, subj, obj))
+
+    if subj not in knowledge_base:
+        knowledge_base[subj] = {'groups': set()}
+
+    if 'performs-over' not in knowledge_base[subj]:
+        knowledge_base[subj]['performs-over'] = {}
+
+    if verb not in knowledge_base[subj]['performs-over']:
+        knowledge_base[subj]['performs-over'][verb] = set()
+
+    return modifiable_element_for_existance_in_set(
+        container=knowledge_base[subj]['performs-over'],
+        set_name=verb,
+        element=obj
+    )
+

 knowledge_ingestion = {
    "exists-property-with-value": exists_property_with_value,
    "pertenence-to-group": pertenence_to_group,
    "has-capacity": has_capacity,
    "question": question,
+    "implies": implies,
+    "property-has-value": property_has_value,
+    "perform-verb-over-object": perform_verb_over_object,
 }


@ -152,6 +263,29 @@ def integrate_information(knowledge_base, example):
    args = ast[1:]
    elements = example.get('elements', None)

+    session().annotate("Integrating:")
+    session().annotate("AST: {}".format(ast))
+    session().annotate("ARG: {}".format(elements))
+    session().annotate("------------")
+
    return tagged_with_ast(
        ast, elements,
        knowledge_ingestion[method](knowledge_base, elements, *args))
+
+def can_be_used_in_place(knowledge, token, minisegment):
+    if token not in knowledge.knowledge:
+        return True
+
+    info = knowledge.knowledge[token]
+    info_groups = info.get('groups', set())
+    minisegment_groups = minisegment.get('groups', set())
+
+    # Common group
+    if len(info_groups & minisegment_groups) > 0:
+        return True
+
+    # Neither has a group
+    elif len(info_groups) == 0 == len(minisegment_groups):
+        return True
+
+    return False
--- a/naive-nlu/tree_nlu/layered_model.py
+++ b/naive-nlu/tree_nlu/layered_model.py
@ -0,0 +1,49 @@
+from .layers import tokenization_layer
+from .layers import parsing_layer
+from .layers import parsing
+from .session.org_mode import global_session as session
+
+
+def make_yield_pipe(layers, knowledge_base, example, func):
+    if len(layers) < 1:
+        yield example
+        return
+
+    input_generator = make_yield_pipe(layers[:-1], knowledge_base, example, func)
+    for input in input_generator:
+        session().annotate("[{}] --> {}".format(len(layers), input))
+        for d in list(func(layers[-1], input)):
+            yield d
+
+
+class BaseModel:
+    def __init__(self, knowledge_base):
+        self.tokenization = tokenization_layer.TokenizationLayer(knowledge_base)
+        self.parsing = parsing_layer.ParsingLayer()
+
+        self.layers = [
+            self.tokenization,
+            self.parsing,
+        ]
+
+    def reprocess(self, examples):
+        pattern_examples = []
+        for i, sample in enumerate(examples):
+            other = examples[:i] + examples[i + 1:]
+            match = parsing.get_matching(sample, other)
+            if len(match) > 0:
+                sample = (match, sample[1],)
+            pattern_examples.append(sample)
+
+        return pattern_examples
+
+    def integrate(self, knowledge_base, example):
+        yield from make_yield_pipe(self.layers, knowledge_base,
+         example, lambda l, i: l.integrate(knowledge_base, i))
+
+    def process(self, knowledge_base, example):
+        yield from make_yield_pipe(self.layers, knowledge_base,
+            example, lambda l, i: l.process(knowledge_base, i))
+
+    def tokenize(self, row, return_one=True):
+        return self.tokenization.to_tokens(row)
--- a/naive-nlu/tree_nlu/layers/parsing.py
+++ b/naive-nlu/tree_nlu/layers/parsing.py
@ -0,0 +1,500 @@
+#!/usr/bin/env python
+
+from ..session.org_mode import global_session as session
+import re
+import copy
+
+from functools import reduce
+from typing import List, Dict
+from ..modifiable_property import ModifiableProperty
+from .. import parameters
+from ..atoms import Atom, a, is_atom
+from .. import knowledge_evaluation
+
+def make_template(knowledge_base, tokens, parsed):
+    matcher = list(tokens)
+    template = list(parsed)
+    session().annotate(" -- MK TEMPLATE --")
+    session().annotate("MATCHR: {}".format(matcher))
+    session().annotate("TEMPLT: {}".format(template))
+    for i in range(len(matcher)):
+        word = matcher[i]
+        if word in template:
+            template[template.index(word)] = i
+            matcher[i] = {
+                'groups': set(knowledge_base.knowledge.get(word, {}).get('groups', set())),
+            }
+    return tokens, matcher, template
+
+
+def is_bottom_level(tree):
+    for element in tree:
+        if isinstance(element, list) or isinstance(element, tuple):
+            return False
+    return True
+
+
+def get_lower_levels(parsed):
+    lower = []
+    def aux(subtree, path):
+        nonlocal lower
+        deeper = len(path) == 0
+        for i, element in enumerate(subtree):
+            if isinstance(element, list) or isinstance(element, tuple):
+                aux(element, path + (i,))
+                deeper = True
+
+        if not deeper:
+            lower.append((path, subtree))
+
+    aux(parsed, path=())
+    return lower
+
+
+# TODO: probably optimize this, it creates lots of unnecessary tuples
+def replace_position(tree, position, new_element):
+    session().annotate("REPLACE POSITIONS:")
+    session().annotate("  TREE  : {}".format(tree))
+    session().annotate("POSITION: {}".format(position))
+    session().annotate("NEW ELEM: {}".format(new_element))
+    session().annotate("------------------")
+
+    def aux(current_tree, remaining_route):
+        if len(remaining_route) == 0:
+            return new_element
+
+        else:
+            step = remaining_route[0]
+            return (
+                tree[:step]
+                + (aux(tree[step], remaining_route[1:]),)
+                + tree[step + 2:]
+            )
+
+    result = aux(tree, position)
+    session().annotate("-RESULT: {}".format(result))
+    return result
+
+
+def integrate_language(knowledge_base, example):
+    text = example["text"].lower()
+    parsed = example["parsed"]
+
+    tokens = example['tokens']
+    resolved_parsed = copy.deepcopy(parsed)
+
+    while True:
+        session().annotate("P: {}".format(resolved_parsed))
+        lower_levels = get_lower_levels(resolved_parsed)
+        session().annotate("Lower: {}".format(lower_levels))
+        if len(lower_levels) == 0:
+            break
+
+        for position, atom in lower_levels:
+            with session().log("Atom {}".format(atom)):
+                result = None
+                similars = get_similar_tree(knowledge_base, atom, tokens)
+                for similar in similars:
+                    result = build_remix_matrix(knowledge_base, tokens, atom, similar)
+                    if result is not None:
+                        break
+                else:
+                    raise Exception('Similar not found')
+
+                remix, (start_bounds, end_bounds) = result
+
+                after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
+                session().annotate("--FIND MIX--")
+                session().annotate("-MIX- | {}".format(remix))
+                session().annotate("-FRM- | {}".format(tokens))
+                session().annotate("-AFT- | {}".format(after_remix))
+
+                session().annotate("--- TEMPLATE ---")
+
+                _, matcher, result = make_template(knowledge_base, after_remix, atom)
+                session().annotate("Tx: {}".format(after_remix))
+                session().annotate("Mx: {}".format(matcher))
+                session().annotate("Rx: {}".format(result))
+                session().annotate("Sx: {}".format(start_bounds))
+                session().annotate("Ex: {}".format(end_bounds))
+
+
+                assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens))
+                session().annotate( "  +-> {}".format(after_remix))
+                subquery_type = knowledge_evaluation.get_subquery_type(knowledge_base.knowledge, atom)
+                session().annotate(r"  \-> <{}>".format(subquery_type))
+
+                # Clean remaining tokens
+                new_tokens = list(tokens)
+                offset = len(start_bounds)
+                for _ in range(len(remix)):
+                    new_tokens.pop(offset)
+
+                # TODO: Get a specific types for... types
+                new_tokens.insert(offset, (subquery_type, remix))
+                tokens = new_tokens
+
+                resolved_parsed = replace_position(resolved_parsed, position, offset)
+                session().annotate("RP: {}".format(resolved_parsed))
+                session().annotate("AT: {}".format(atom))
+                session().annotate("#########")
+
+
+    tokens, matcher, result = make_template(knowledge_base, tokens, resolved_parsed)
+    session().annotate("T: {}".format(tokens))
+    session().annotate("M: {}".format(matcher))
+    session().annotate("R: {}".format(result))
+    session().annotate("---")
+    yield tokens, matcher, result
+
+
+def apply_remix(tokens, remix):
+    rebuilt = []
+    for i in remix:
+        if isinstance(i, int):
+            if i >= len(tokens):
+                return None
+            rebuilt.append(tokens[i])
+        else:
+            assert(isinstance(i, str))
+            rebuilt.append(i)
+    return rebuilt
+
+
+def build_remix_matrix(knowledge_base, tokens, atom, similar):
+    tokens = list(tokens)
+    with session().log("Remix matrix for {} - {}".format(tokens, atom)):
+        tokens, matcher, result = make_template(knowledge_base, tokens, atom)
+        similar_matcher, similar_result, similar_result_resolved, _, _ = similar
+
+        start_bounds, end_bounds = find_bounds(knowledge_base, matcher, similar_matcher)
+
+        for i, element in (end_bounds + start_bounds[::-1]):
+            matcher.pop(i)
+            tokens.pop(i)
+
+        possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher)
+        session().annotate("Possible remixes: {}".format(possible_remixes))
+        if len(possible_remixes) < 1:
+            return None
+
+        chosen_remix = possible_remixes[0]
+
+        return chosen_remix, (start_bounds, end_bounds)
+
+
+def get_possible_remixes(knowledge_base, matcher, similar_matcher):
+
+    matrix = []
+    with session().log("Possible remixes from matcher: {}".format(matcher)):
+        for element in matcher:
+            with session().log("Element `{}`".format(element)):
+                session().annotate("Similar `{}`".format(similar_matcher))
+                if element in similar_matcher or isinstance(element, dict):
+                    if isinstance(element, dict):
+                        indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
+                        session().annotate("Dict element matching: {}".format(indexes))
+                    else:
+                        indexes = all_indexes(similar_matcher, element)
+                        session().annotate("* element matching: {}".format(indexes))
+                    matrix.append(indexes)
+                else:
+                    session().annotate("`else` element matching: [element]")
+                    matrix.append([element])
+
+        # TODO: do some scoring to find the most "interesting combination"
+        return [list(x) for x in list(zip(*matrix))]
+
+
+def all_indexes(collection, element):
+    indexes = []
+    base = 0
+
+    for _ in range(collection.count(element)):
+        i = collection.index(element, base)
+        base = i + 1
+        indexes.append(i)
+
+    return indexes
+
+
+def all_matching_indexes(knowledge_base, collection, element):
+    indexes = []
+
+    with session().log('Matching “{}”'.format(element)):
+        assert("groups" in element)
+        element = element["groups"]
+        for i, instance in enumerate(collection):
+            session().log('Checking “{}”'.format(instance))
+
+            if isinstance(instance, dict):
+                instance = instance["groups"]
+            elif instance in knowledge_base.knowledge:
+                session().log('Knowledge about “{}”: ”{}”'.format(instance, knowledge_base.knowledge[instance]))
+
+                if "groups" not in knowledge_base.knowledge[instance]:
+                    # This means that is only known as token
+                    # so we should try to avoid using it
+                    continue
+
+                instance = knowledge_base.knowledge[instance]["groups"]
+
+            intersection = set(instance) & set(element)
+            if (len(intersection) > 0 or (0 == len(instance) == len(element))):
+                indexes.append((i, intersection))
+
+        return [x[0] for x in sorted(indexes, key=lambda x: len(x[1]), reverse=True)]
+
+
+def element_matches_groups(knowledge, element: Dict, groups):
+  with session().log("Checking if e “{}” matches groups “{}”".format(element, groups)):
+    if isinstance(groups, str) and groups in knowledge:
+        return len(knowledge[groups].get("groups", set()) & element['groups']) > 0
+    elif isinstance(groups, dict):
+        return len(element.get("groups", set()) & element['groups']) > 0
+    return False
+
+
+def find_bounds(knowledge, matcher, similar_matcher):
+    start_bounds = []
+    for i, element in enumerate(matcher):
+        if element in similar_matcher:
+            break
+        else:
+            start_bounds.append((i, element))
+
+    end_bounds = []
+    for i, element in enumerate(matcher[::-1]):
+        in_similar = False
+        if isinstance(element, str):
+            in_similar = element in similar_matcher
+        elif isinstance(element, dict):
+            in_similar = any(map(lambda groups: element_matches_groups(knowledge.knowledge,
+                                                                       element, groups),
+                                 similar_matcher))
+
+        if in_similar:
+            break
+        else:
+            end_bounds.append((len(matcher) - (i + 1), element))
+
+    return start_bounds, end_bounds
+
+
+def get_similar_tree(knowledge_base, atom, tokens):
+    possibilities = []
+
+    # Find matching possibilities
+    for entry, tree in knowledge_base.trained:
+        if not is_bottom_level(tree):
+            continue
+        if tree[0] == atom[0]:
+            possibilities.append((entry, tree))
+
+    # Sort by more matching elements
+    sorted_possibilities = []
+    for (raw, possibility) in possibilities:
+        resolved = []
+        for element in atom:
+            if isinstance(element, str):
+                resolved.append(element)
+            else:
+                resolved.append(knowledge_evaluation.resolve(
+                    knowledge_base.knowledge,
+                    element,
+                    raw))
+
+        # TODO: Probably should take into account the categories of the elements in the "intake" ([0]) element
+        atom_score = sum([resolved[i] == atom[i]
+                     for i
+                     in range(min(len(resolved),
+                                  len(atom)))])
+        token_score = sum([similar_token in tokens
+                           for similar_token
+                           in raw])
+
+        sorted_possibilities.append((raw, possibility, resolved, atom_score, token_score))
+
+    sorted_possibilities = sorted(sorted_possibilities, key=lambda p: p[3] * 100 + p[4], reverse=True)
+    if len(sorted_possibilities) < 1:
+        return []
+
+    for i, possibility in enumerate(sorted_possibilities):
+        similar_matcher, similar_result, similar_result_resolved, _atom_score, _token_score = possibility
+        with session().log("Like {}".format(similar_matcher)):
+            session().annotate('AST: {}'.format(similar_result))
+            session().annotate('Results on: {}'.format(similar_result_resolved))
+            session().annotate('Atom score: {}'.format(_atom_score))
+            session().annotate('Token score: {}'.format(_token_score))
+
+    return sorted_possibilities
+
+
+# TODO: unroll this mess
+def get_matching(sample, other):
+    l = len(sample[0])
+    other = list(filter(lambda x: len(x[0]) == l, other))
+    for i in range(l):
+        if len(other) == 0:
+            return []
+
+        if isinstance(sample[0][i], dict): # Dictionaries are compared by groups
+            other = list(filter(lambda x: isinstance(x[0][i], dict) and
+                                len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,
+                                other))
+
+        elif isinstance(sample[0][i], tuple): # Tuples are compared by types [0]
+            other = list(filter(lambda x: isinstance(x[0][i], tuple) and
+                                x[0][i][0] == sample[0][i][0],
+                                other))
+
+    matching = []
+    for x in range(l):  # Generate the combination of this and other(s) matcher
+        first_sample_data = sample[0][x]
+        if isinstance(first_sample_data, str):
+            matching.append(first_sample_data)
+        elif isinstance(first_sample_data, tuple):
+            matching.append(first_sample_data)
+        else:
+            this_groups = sample[0][x]['groups']
+            if len(other) > 0:
+                other_groups = reduce(lambda a, b: a & b,
+                                      map(lambda y: y[0][x]['groups'],
+                                          other))
+                this_groups = this_groups & other_groups
+
+            matching.append({'groups': this_groups})
+    return matching
+
+
+def reverse_remix(tree_section, remix):
+    result_section = []
+    offset = 0
+    for origin in remix:
+        if isinstance(origin, int):
+            if (origin + offset) >= len(tree_section):
+                return None
+
+            result_section.append(copy.deepcopy(tree_section[origin + offset]))
+        else:
+            assert(isinstance(origin, str))
+            offset += 1
+    return result_section + tree_section[len(remix):]
+
+
+def get_fit(knowledge, tokens, remaining_recursions=parameters.MAX_RECURSIONS):
+    results = []
+    for matcher, ast in knowledge.trained:
+        with session().log("{} <- {}".format(matcher, tokens)):
+            result = match_fit(knowledge, tokens, matcher, ast,
+                               remaining_recursions)
+
+            if result is not None:
+                with session().log("Result: {}".format(result)):
+                    results.append(result)
+
+    if len(results) > 0:
+        return results[0]
+
+
+def is_definite_minisegment(minisegment):
+    return isinstance(minisegment, str) or isinstance(minisegment, dict)
+
+
+def match_token(knowledge, next_token, minisegment):
+    if isinstance(minisegment, dict):
+        return knowledge_evaluation.can_be_used_in_place(knowledge, next_token, minisegment)
+    elif isinstance(minisegment, str):
+        # TODO: check if the two elements can be used in each other place
+        return next_token == minisegment
+
+    return False
+
+
+def resolve_fit(knowledge, fit, remaining_recursions):
+    fitted = []
+    for element in fit:
+        if is_definite_minisegment(element):
+            fitted.append(element)
+        else:
+            with session().log("Resolving fit of `{}`".format(element)):
+                ((result_type, remixer), tokens) = element
+                remixed_tokens = reverse_remix(tokens, remixer)
+                if remixed_tokens is None:
+                    return None
+
+                minifit = get_fit(knowledge, remixed_tokens, remaining_recursions - 1)
+                if minifit is None:
+                    return None
+
+                minitokens, miniast = minifit
+                session().annotate(" AST | {}".format(miniast))
+                subproperty = knowledge_evaluation.resolve(knowledge.knowledge, minitokens, miniast)
+                fitted.append(subproperty)
+
+    return fitted
+
+
+def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
+    segment_possibilities = [([], tokens)]  # Matched tokens, remaining tokens
+    indent = ' ' * (parameters.MAX_RECURSIONS - remaining_recursions)
+    session().annotate(indent + 'T> {}'.format(tokens))
+    session().annotate(indent + 'M> {}'.format(matcher))
+    for minisegment in matcher:
+        with session().log("Minisegment `{}`".format(minisegment)):
+            possibilities_after_round = []
+            for matched_tokens, remaining_tokens in segment_possibilities:
+                if len(remaining_tokens) < 1:
+                    continue
+
+                session().annotate(indent + "RT {}".format(remaining_tokens[0]))
+                session().annotate(indent + "DEF {}".format(is_definite_minisegment(minisegment)))
+                if is_definite_minisegment(minisegment):
+                    # What if not match -----<
+                    if match_token(knowledge, remaining_tokens[0], minisegment):
+                        possibilities_after_round.append((
+                            matched_tokens + [remaining_tokens[0]],
+                            remaining_tokens[1:]
+                           ))
+                else:
+                    # What if not match!!!!!!-----<
+                    # TODO: optimize this with a look ahead
+                    for i in range(1, len(tokens)):
+                        possibilities_after_round.append((
+                            matched_tokens + [(minisegment, remaining_tokens[:i])],
+                            remaining_tokens[i:]
+                           ))
+                session().annotate(indent + "## PA {}".format(possibilities_after_round))
+            else:
+                segment_possibilities = possibilities_after_round
+                for possibility in segment_possibilities:
+                    with session().log("Possibility: `{}`".format(possibility)):
+                        pass
+                if len(segment_possibilities) < 1:
+                    with session().log("NO POSSIBLE"):
+                        pass
+
+    fully_matched_segments = [(matched, remaining)
+                              for (matched, remaining)
+                              in segment_possibilities
+                              if len(remaining) == 0]
+
+    resolved_fits = []
+    with session().log("Full matches"):
+        for fit, _ in fully_matched_segments:
+            with session().log(fit):  # REMIXES HAVE TO BE APPLIED BEFORE!!!
+                pass
+
+    with session().log("Resolutions"):
+        for fit, _ in fully_matched_segments:
+            with session().log("Resolving {}".format(fit)):  # REMIXES HAVE TO BE APPLIED BEFORE!!!
+                resolved_fit = resolve_fit(knowledge, fit, remaining_recursions)
+                if resolved_fit is not None:
+                    resolved_fits.append(resolved_fit)
+                else:
+                    session().annotate("Not resolved")
+
+    if len(resolved_fits) == 0:
+        return None
+
+    return resolved_fits[0], ast
--- a/naive-nlu/tree_nlu/layers/parsing_layer.py
+++ b/naive-nlu/tree_nlu/layers/parsing_layer.py
@ -0,0 +1,16 @@
+from . import parsing
+
+class ParsingLayer:
+    def __init__(self):
+        pass
+
+    def integrate(self, knowledge_base, example):
+        yield from parsing.integrate_language(knowledge_base, example)
+
+    def train(self, knowledge_base, example):
+        assert False
+
+    def process(self, knowledge_base, input):
+        fit = parsing.get_fit(knowledge_base, input)
+        if fit is not None:
+            yield fit
--- a/naive-nlu/tree_nlu/layers/tokenization.py
+++ b/naive-nlu/tree_nlu/layers/tokenization.py
@ -0,0 +1,186 @@
+from ..session.org_mode import global_session as session
+from ..atoms import Atom, a, is_atom
+
+def lookahead_for_tokens_or_strucutral_elements(knowledge_base, remaining):
+    for se in knowledge_base.structural_elements:
+        found_position = remaining.find(se)
+        found = found_position >= 0
+        session().annotate('Looking for structure with “{}”, found? {}'.format(se, found))
+        if found:
+            return [
+                (remaining[:found_position], se, remaining[found_position + len(se):])
+            ]
+
+    for token in knowledge_base.knowledge.keys():
+        found_position = remaining.find(token)
+        found = found_position >= 0
+        session().annotate('Looking for token “{}”, found? {}'.format(token, found))
+        if found:
+            return [
+                (remaining[:found_position], token, remaining[found_position + len(token):])
+            ]
+
+    return None
+
+
+
+def to_tokens(knowledge_base, text, precedent=None):
+    if len(text) == 0:
+        session().annotate("No text remaining")
+        yield ['']
+        return
+
+    with session().log("Tokenizing {}".format(text)):
+        for option in knowledge_base.expected_token_after_precedent(precedent):
+            with session().log("Next: “{}”".format(option)):
+                with session().log("Matching “{}” on “{}”".format(option, text)):
+                    for token_match in tokenization_match(option, text, knowledge_base):
+                        if token_match is None:
+                            session().annotate("No match")
+
+                        match, remaining = token_match
+                        if len(remaining) == len(text):
+                            raise Exception('No text consumed in match')
+
+                        session().annotate('Match: “{}”'.format(match))
+                        with session().log('Remaining “{}”'.format(remaining)):
+                            for sublevel in to_tokens(knowledge_base, remaining, match):
+                                candidate = list(filter(lambda x: x != '', [match] + sublevel))
+                                session().annotate('Yielding candidate “{}”'.format(candidate))
+                                yield candidate
+
+
+def tokenization_match(element, text, knowledge_base):
+    # Constant/structural string matching
+    if isinstance(element, str):
+        if text.find(element) == 0:
+            # This match comes from a structuring element
+            # It doesn't appear on the tokenization
+            # So we should return it as an empty string
+            yield ('', text[len(element):])
+            return
+        else:
+            # No match found
+            return
+
+    elif is_atom(element, 'token'):
+        yield from match_single_token(text, knowledge_base)
+        return
+    raise NotImplementedError()
+
+
+def match_single_token(text, knowledge_base):
+    found_token = False
+    for token in knowledge_base.knowledge.keys():
+        if text.find(token) == 0:
+            yield token, text[len(token):]
+            found_token = True
+
+    if found_token:
+        return
+
+    session().annotate('No token found at the start of ”{}”'.format(text))
+    session().annotate('using structural elements to infer it')
+    # TODO: review this when multiple structural elements are available
+    for se in knowledge_base.structural_elements:
+        session().annotate('Looking for se “{}” in “{}”'.format(se, text))
+        position = text.find(se, 0)
+        found = position > 0  # 0 is not considered a valid position for this kind of split
+        if found:
+            session().annotate('Found ”{}”, inferring “{}”'.format(se, text[:position]))
+            yield text[:position], text[position:]
+
+    session().annotate('No structural element or token found, inferring only token remaining')
+    yield text, ''
+
+    # Using other tokens for cutoff
+    for token in knowledge_base.knowledge.keys():
+        session().annotate('Looking for token “{}” in “{}”'.format(token, text))
+        position = text.find(token)
+        found = position >= 0
+        if found:
+            session().annotate('Found ”{}”, in position ”{}”'.format(token, position))
+            yield text[:position], text[position:]
+
+
+def integrate_tokenization(knowledge_base, example):
+    text = example['text']
+    tokens = example['tokens']
+    meaning = example.get('meaning')
+
+    return integrate_token_to_text_matching(knowledge_base, text, tokens)
+
+
+def integrate_token_to_text_matching(knowledge_base, text, tokens):
+    texts = [text]
+
+    # Convert to tokens
+    for token_id, token in enumerate(tokens):
+        # Look for token in texts
+        for i, text in enumerate(texts):
+            if isinstance(text, int):
+                continue
+
+            if token in text:
+                before, after = text.split(token, maxsplit=1)
+                texts = (texts[:i] + [before]
+                         + [a('token')]
+                         + [after] + texts[i + 1:])
+                break
+        else:
+            raise Exception('Token not found')
+
+    # Remove leftovers from splits
+    texts = list(filter(lambda x: x != '', texts))
+    session().log("Tokenized as {} over {}".format(texts, tokens))
+
+    for i, element in enumerate(texts[:-1]):
+       learn_token_pair(element, texts[i + 1], knowledge_base)
+
+    return tokens
+
+def learn_token_pair(precedent, consequent, knowledge_base):
+    knowledge_base.add_token_pair(precedent, consequent)
+
+
+def pick_one_tokenization(options, knowledge_base):
+    '''
+    Heuristic function to pick the most probable tokenization.
+
+    Just pick the one with more results.
+    '''
+    options = list(options)
+    with session().log("Picking among: {} options".format(len(options))):
+        session().log("Options: \n{}".format('\n'.join(map(str, options))))
+        return pick_by_score(options,
+                             [
+                                 # By number of splits without structuring elements
+                                 lambda tokenization: sum(map(
+                                     lambda split: sum(map(
+                                         lambda se: se in split, knowledge_base.structural_elements
+                                     )), tokenization)),
+
+                                 # By number of unknown tokens
+                                 lambda tokenization: len(list(filter(lambda token:
+                                                                      (token not in knowledge_base.knowledge.keys()) and
+                                                                      (token not in knowledge_base.structural_elements),
+                                                                      tokenization))),
+
+                                 # By number of splits
+                                 lambda tokenization: -len(tokenization),
+                             ])
+
+def pick_by_score(options, heuristics):
+    for heuristic in heuristics:
+        assert(len(options) > 0)
+        options = list(map(lambda opt: (heuristic(opt), opt), options))
+        sorted_options = sorted(options, key=lambda x: x[0], reverse=False)
+
+        heuristic_cutoff = sorted_options[0][0]
+        session().annotate(sorted_options)
+        pass_heuristic = [opt for (score, opt) in sorted_options if score <= heuristic_cutoff]
+        options = pass_heuristic
+
+    session().log("{} finalists: \n{}".format(len(options), '\n'.join(map(str, options))))
+    return options[0]
+
--- a/naive-nlu/tree_nlu/layers/tokenization_layer.py
+++ b/naive-nlu/tree_nlu/layers/tokenization_layer.py
@ -0,0 +1,90 @@
+from ..session.org_mode import global_session as session
+from ..atoms import Atom
+from . import tokenization
+import random
+import copy
+
+def randomized_weighted_list(elements):
+    # Randomized
+    randomized = list(elements)
+    random.shuffle(randomized)
+
+    # And return only once
+    already_returned = set()
+    for e in randomized:
+        if e in already_returned:
+            continue
+
+        yield e
+        already_returned.add(e)
+
+class TokenizationLayer:
+    def __init__(self, knowledge_base):
+        self.structural_elements = set()
+        self.token_chains = {}
+        self.tokens = set()
+        self.knowledge_base = knowledge_base
+        self.knowledge = knowledge_base.knowledge
+
+    def integrate(self, knowledge_base, data):
+        assert knowledge_base is self.knowledge_base
+
+        assert 'text' in data
+        tokens = self.tokenize(data['text'])
+        data_with_row = copy.copy(data)
+        data_with_row['tokens'] = tokens
+        yield data_with_row
+
+        # with session().log("Tokenize: {}".format(data['text'])):
+        #     for tokens in tokenization.to_tokens(self, data['text']):
+        #         data_with_row = copy.copy(data)
+        #         data_with_row['tokens'] = tokens
+        #         yield data_with_row
+        
+    def process(self, knowledge_base, row):
+        yield self.tokenize(row)
+
+
+    def tokenize(self, row, return_one=True):
+        row = row.lower()
+        with session().log("Tokenize: {}".format(row)):
+            options = list(tokenization.to_tokens(self, row))
+            session().log("Results:\n{}".format('\n'.join(map(str, options))))
+
+            if return_one:
+                chosen = tokenization.pick_one_tokenization(options, self)
+                session().log("Chosen: “{}”".format(chosen))
+                self.train({'text': row, 'tokens': chosen})
+                return chosen
+            return options
+
+    ## Tokenization
+    def add_token_pair(self, precedent, consequent):
+        self.add_token(precedent)
+        self.add_token(consequent)
+
+        if precedent not in self.token_chains:
+            self.token_chains[precedent] = []
+        self.token_chains[precedent].append(consequent)
+
+    def add_token(self, token):
+        self.tokens.add(token)
+        if (not isinstance(token, Atom)) and (token not in self.structural_elements):
+            session().annotate('Found new structural element “{}”'.format(token))
+            self.structural_elements.add(token)
+
+    def expected_token_after_precedent(self, precedent=None):
+        if precedent not in self.token_chains:  # If there's no known precedent, just return all tokens
+            return randomized_weighted_list(self.tokens)
+
+        return randomized_weighted_list(self.token_chains[precedent])
+
+    def train(self, example):
+        with session().log('Training tokenizer'):
+            session().annotate("Example: {}".format(example))
+            tokens = tokenization.integrate_tokenization(self, example)
+
+            # Integrate knowledge of concept
+            for token in tokens:
+                if not token in self.knowledge:
+                    self.knowledge[token] = {}
--- a/naive-nlu/tree_nlu/parsing.py
+++ b/naive-nlu/tree_nlu/parsing.py
@ -1,384 +0,0 @@
-#!/usr/bin/env python
-
-from . import knowledge_evaluation
-
-from . import depth_meter
-import logging
-import re
-import copy
-
-from functools import reduce
-from typing import List
-from .modifiable_property import ModifiableProperty
-from . import parameters
-
-# TODO: more flexible tokenization
-def to_tokens(text):
-    return re.findall(r'(\w+|[^\s])', text)
-
-
-def make_template(knowledge_base, tokens, parsed):
-    matcher = list(tokens)
-    template = list(parsed)
-    for i in range(len(matcher)):
-        word = matcher[i]
-        if word in template:
-            template[template.index(word)] = i
-            matcher[i] = {
-                'groups': set(knowledge_base.knowledge[word]['groups'])
-            }
-    return tokens, matcher, template
-
-
-def is_bottom_level(tree):
-    for element in tree:
-        if isinstance(element, list) or isinstance(element, tuple):
-            return False
-    return True
-
-
-def get_lower_levels(parsed):
-    lower = []
-    def aux(subtree, path):
-        nonlocal lower
-        deeper = len(path) == 0
-        for i, element in enumerate(subtree):
-            if isinstance(element, list) or isinstance(element, tuple):
-                aux(element, path + (i,))
-                deeper = True
-
-        if not deeper:
-            lower.append((path, subtree))
-
-    aux(parsed, path=())
-    return lower
-
-
-# TODO: probably optimize this, it creates lots of unnecessary tuples
-def replace_position(tree, position, new_element):
-
-    def aux(current_tree, remaining_route):
-        if len(remaining_route) == 0:
-            return new_element
-
-        else:
-            step = remaining_route[0]
-            return (
-                tree[:step]
-                + (aux(tree[step], remaining_route[1:]),)
-                + tree[step + 2:]
-            )
-
-    return aux(tree, position)
-
-
-def integrate_language(knowledge_base, example):
-    text = example["text"].lower()
-    parsed = example["parsed"]
-
-    resolved_parsed = copy.deepcopy(parsed)
-    tokens = to_tokens(text)
-
-    while True:
-        logging.debug("P: {}".format(resolved_parsed))
-        lower_levels = get_lower_levels(resolved_parsed)
-        logging.debug("Lower: {}".format(lower_levels))
-        if len(lower_levels) == 0:
-            break
-
-        for position, atom in lower_levels:
-            logging.debug("\x1b[1mSelecting\x1b[0m: {}".format(atom))
-            similar = get_similar_tree(knowledge_base, atom)
-            remix, (start_bounds, end_bounds) = build_remix_matrix(knowledge_base, tokens, atom, similar)
-            _, matcher, result = make_template(knowledge_base, tokens, atom)
-            logging.debug("Tx: {}".format(tokens))
-            logging.debug("Mx: {}".format(matcher))
-            logging.debug("Rx: {}".format(result))
-            logging.debug("Remix: {}".format(remix))
-
-            after_remix = apply_remix(tokens[len(start_bounds):-len(end_bounds)], remix)
-            assert(len(after_remix) + len(start_bounds) + len(end_bounds) == len(tokens))
-            logging.debug( "  +-> {}".format(after_remix))
-            subquery_type = knowledge_evaluation.get_subquery_type(knowledge_base.knowledge, atom)
-            logging.debug(r"  \-> <{}>".format(subquery_type))
-
-            # Clean remaining tokens
-            new_tokens = list(tokens)
-            offset = len(start_bounds)
-            for _ in range(len(remix)):
-                new_tokens.pop(offset)
-
-            # TODO: Get a specific types for... types
-            new_tokens.insert(offset, (subquery_type, remix))
-            tokens = new_tokens
-
-            resolved_parsed = replace_position(resolved_parsed, position, offset)
-            logging.debug("#########")
-
-
-    tokens, matcher, result = make_template(knowledge_base, tokens, resolved_parsed)
-    logging.debug("T: {}".format(tokens))
-    logging.debug("M: {}".format(matcher))
-    logging.debug("R: {}".format(result))
-    logging.debug("---")
-    return tokens, matcher, result
-
-
-def apply_remix(tokens, remix):
-    rebuilt = []
-    for i in remix:
-        rebuilt.append(tokens[i])
-    return rebuilt
-
-
-def build_remix_matrix(knowledge_base, tokens, atom, similar):
-    tokens = list(tokens)
-    tokens, matcher, result = make_template(knowledge_base, tokens, atom)
-    similar_matcher, similar_result, similar_result_resolved, _ = similar
-
-    start_bounds, end_bounds = find_bounds(matcher, similar_matcher)
-
-    for i, element in (end_bounds + start_bounds[::-1]):
-        matcher.pop(i)
-        tokens.pop(i)
-
-    possible_remixes = get_possible_remixes(knowledge_base, matcher, similar_matcher)
-    chosen_remix = possible_remixes[0]
-
-    return chosen_remix, (start_bounds, end_bounds)
-
-
-def get_possible_remixes(knowledge_base, matcher, similar_matcher):
-
-    matrix = []
-    for element in matcher:
-        logging.debug("- {}".format(element))
-        logging.debug("+ {}".format(similar_matcher))
-        assert(element in similar_matcher or isinstance(element, dict))
-
-        if isinstance(element, dict):
-            indexes = all_matching_indexes(knowledge_base, similar_matcher, element)
-        else:
-            indexes = all_indexes(similar_matcher, element)
-        matrix.append(indexes)
-
-    # TODO: do some scoring to find the most "interesting combination"
-    return [list(x) for x in list(zip(*matrix))]
-
-
-def all_indexes(collection, element):
-    indexes = []
-    base = 0
-
-    for _ in range(collection.count(element)):
-        i = collection.index(element, base)
-        base = i + 1
-        indexes.append(i)
-
-    return indexes
-
-
-def all_matching_indexes(knowledge_base, collection, element):
-    indexes = []
-
-    assert("groups" in element)
-    element = element["groups"]
-    for i, instance in enumerate(collection):
-        if isinstance(instance, dict):
-            instance = instance["groups"]
-        elif instance in knowledge_base.knowledge:
-            instance = knowledge_base.knowledge[instance]["groups"]
-
-        intersection = set(instance) & set(element)
-        if len(intersection) > 0:
-            indexes.append((i, intersection))
-
-    return [x[0] for x in sorted(indexes, key=lambda x: len(x[1]), reverse=True)]
-
-
-def find_bounds(matcher, similar_matcher):
-    start_bounds = []
-    for i, element in enumerate(matcher):
-        if element in similar_matcher:
-            break
-        else:
-            start_bounds.append((i, element))
-
-    end_bounds = []
-    for i, element in enumerate(matcher[::-1]):
-        if element in similar_matcher:
-            break
-        else:
-            end_bounds.append((len(matcher) - (i + 1), element))
-
-    return start_bounds, end_bounds
-
-
-def get_similar_tree(knowledge_base, atom):
-    possibilities = []
-
-    # Find matching possibilities
-    for entry, tree in knowledge_base.trained:
-        if not is_bottom_level(tree):
-            continue
-        if tree[0] == atom[0]:
-            possibilities.append((entry, tree))
-
-    # Sort by more matching elements
-    sorted_possibilities = []
-    for (raw, possibility) in possibilities:
-        resolved = []
-        for element in atom:
-            if isinstance(element, str):
-                resolved.append(element)
-            else:
-                resolved.append(knowledge_evaluation.resolve(
-                    knowledge_base.knowledge,
-                    element,
-                    raw))
-
-        # TODO: Probably should take into account the categories of the elements in the "intake" ([0]) element
-        score = sum([resolved[i] == atom[i]
-                     for i
-                     in range(min(len(resolved),
-                                  len(atom)))])
-        sorted_possibilities.append((raw, possibility, resolved, score))
-    sorted_possibilities = sorted(sorted_possibilities, key=lambda p: p[3], reverse=True)
-    if len(sorted_possibilities) < 1:
-        return None
-
-    return sorted_possibilities[0]
-
-
-# TODO: unroll this mess
-def get_matching(sample, other):
-    l = len(sample[0])
-    other = list(filter(lambda x: len(x[0]) == l, other))
-    for i in range(l):
-        if len(other) == 0:
-            return []
-
-        if isinstance(sample[0][i], dict): # Dictionaries are compared by groups
-            other = list(filter(lambda x: isinstance(x[0][i], dict) and
-                                len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,
-                                other))
-
-        elif isinstance(sample[0][i], tuple): # Tuples are compared by types [0]
-            other = list(filter(lambda x: isinstance(x[0][i], tuple) and
-                                x[0][i][0] == sample[0][i][0],
-                                other))
-
-    return [sample[0][x] if isinstance(sample[0][x], str)
-            else
-            sample[0][x] if isinstance(sample[0][x], tuple)
-            else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b,
-                                                            map(lambda y: y[0][x]['groups'],
-                                                                other))}
-            for x
-            in range(l)]
-
-
-def reprocess_language_knowledge(knowledge_base, examples):
-    examples = knowledge_base.examples + examples
-
-    pattern_examples = []
-    for i, sample in enumerate(examples):
-        other = examples[:i] + examples[i + 1:]
-        match = get_matching(sample, other)
-        if len(match) > 0:
-            sample = (match, sample[1],)
-        pattern_examples.append(sample)
-
-    return pattern_examples
-
-
-def reverse_remix(tree_section, remix):
-    result_section = []
-    for origin in remix:
-        result_section.append(copy.deepcopy(tree_section[origin]))
-    return result_section + tree_section[len(remix):]
-
-
-def get_fit(knowledge, tokens, remaining_recursions=parameters.MAX_RECURSIONS):
-    for matcher, ast in knowledge.trained:
-        result = match_fit(knowledge, tokens, matcher, ast,
-                           remaining_recursions)
-        if result is not None:
-            return result
-
-    return None
-
-
-def is_definite_minisegment(minisegment):
-    return isinstance(minisegment, str) or isinstance(minisegment, dict)
-
-
-def match_token(knowledge, next_token, minisegment):
-    if isinstance(minisegment, dict):
-        # TODO: check if the dictionary matches the values
-        return True
-    elif isinstance(minisegment, str):
-        # TODO: check if the two elements can be used in each other place
-        return next_token == minisegment
-
-    return False
-
-
-def resolve_fit(knowledge, fit, remaining_recursions):
-    fitted = []
-    for element in fit:
-        if is_definite_minisegment(element):
-            fitted.append(element)
-        else:
-            ((result_type, remixer), tokens) = element
-            remixed_tokens = reverse_remix(tokens, remixer)
-            minifit = get_fit(knowledge, remixed_tokens, remaining_recursions - 1)
-            if minifit is None:
-                return None
-
-            minitokens, miniast = minifit
-            subproperty = knowledge_evaluation.resolve(knowledge.knowledge, minitokens, miniast)
-            fitted.append(subproperty)
-
-    return fitted
-
-
-def match_fit(knowledge, tokens, matcher, ast, remaining_recursions):
-    segment_possibilities = [([], tokens)]  # Matched tokens, remaining tokens
-    for minisegment in matcher:
-        possibilities_after_round = []
-        for matched_tokens, remaining_tokens in segment_possibilities:
-            if len(remaining_tokens) < 1:
-                continue
-
-            if is_definite_minisegment(minisegment):
-                if match_token(knowledge, remaining_tokens[0], minisegment):
-                    possibilities_after_round.append((
-                        matched_tokens + [remaining_tokens[0]],
-                        remaining_tokens[1:]
-                    ))
-            else:
-                # TODO: optimize this with a look ahead
-                for i in range(1, len(tokens)):
-                    possibilities_after_round.append((
-                        matched_tokens + [(minisegment, remaining_tokens[:i])],
-                        remaining_tokens[i:]
-                    ))
-        else:
-            segment_possibilities = possibilities_after_round
-
-    fully_matched_segments = [(matched, remaining)
-                              for (matched, remaining)
-                              in segment_possibilities
-                              if len(remaining) == 0]
-
-    resolved_fits = []
-    for fit, _ in fully_matched_segments:
-        resolved_fit = resolve_fit(knowledge, fit, remaining_recursions)
-        if resolved_fit is not None:
-            resolved_fits.append(resolved_fit)
-
-    if len(resolved_fits) == 0:
-        return None
-
-    return resolved_fits[0], ast
--- a/naive-nlu/tree_nlu/session/org_mode.py
+++ b/naive-nlu/tree_nlu/session/org_mode.py
@ -0,0 +1,79 @@
+import logging
+import datetime
+
+SESSION = None
+
+def __gen_session_name__():
+    now = datetime.datetime.utcnow()
+    return "treeNLU-session-{}.org".format(
+        now.strftime("%y_%m_%d %H:%M:%S_%f"))
+
+
+def create_global_session(fname):
+    global SESSION
+    SESSION = OrgModeSession(fname)
+
+
+def global_session():
+    if SESSION is None:
+        session_name = __gen_session_name__()
+        logging.warn("Session not created, saved on {}".format(session_name))
+        create_global_session(session_name)
+
+    assert(SESSION is not None)
+    return SESSION
+
+
+def get_header():
+    now = datetime.datetime.utcnow()
+    return ("# Ran on {}\n".format(
+        now.strftime("%y/%m/%d %H:%M:%S.%f")))
+
+class LevelContext:
+    def __init__(self, increaser, decreaser):
+        self.increaser = increaser
+        self.decreaser = decreaser
+
+    def __enter__(self):
+        self.increaser()
+
+    def __exit__(self, _type, _value, _traceback):
+        self.decreaser()
+
+
+class OrgModeSession:
+    def __init__(self, fname):
+        self.f = open(fname, 'wt')
+        self.level = 0
+        self.dirty = False
+
+        self.f.write(get_header())
+
+    def annotate(self, annotation):
+        if self.dirty:
+            self.f.write("{indentation} {data}\n".format(
+                indentation='*' * (self.level + 1),
+                data="---"))
+            self.dirty = False
+
+        self.f.write("{indentation} {data}\n".format(
+            indentation=' ' * (self.level + 2 + 1),
+            data=annotation))
+
+    def log(self, string):
+        self.f.write("{indentation} {data}\n".format(
+            indentation='*' * (self.level + 1),
+            data=string))
+        self.dirty = False
+
+        return LevelContext(self.inc_level, self.dec_level)
+
+    def inc_level(self):
+        self.level += 1
+
+    def dec_level(self):
+        self.level -= 1
+        self.dirty = True
+
+    def close(self):
+        self.f.close()
--- a/naive-nlu/tree_nlu/test.py
+++ b/naive-nlu/tree_nlu/test.py
@ -1,157 +1,50 @@
-import json
+import traceback
 import logging
+from .session import org_mode

-logging.getLogger().setLevel(logging.INFO)
+from .tests import tokenization
+from .tests import basic
+from .tests import gac_100
+from .tests import gac_extension

-from .knowledge_base import KnowledgeBase
-from .modifiable_property import is_modifiable_property
+logging.getLogger().setLevel(logging.ERROR)

-examples = [
-    {
-        "text": "icecream is cold",
-        "parsed": ("exists-property-with-value", 'icecream', 'cold'),
-    },
-    {
-        "text": "is icecream cold?",
-        "parsed": ("question", ("exists-property-with-value", 'icecream', 'cold'))
-    },
-    {
-        "text": "lava is dangerous",
-        "parsed": ("exists-property-with-value", 'lava', 'dangerous')
-    },
-    {
-        "text": "is lava dangerous?",
-        "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')),
-    },
-    {
-        "text": "earth is a planet",
-        "parsed": ("pertenence-to-group", 'earth', 'planet'),
-    },
-    {
-        "text": "io is a moon",
-        "parsed": ("pertenence-to-group", 'io', 'moon'),
-    },
-    {
-        "text": "is earth a moon?",
-        "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')),
-    },
-    {
-        "text": "Green is a color",
-        "parsed": ("pertenence-to-group", 'green', 'color'),
-    },
-    {
-        "text": "a plane can fly",
-        "parsed": ("has-capacity", 'plane', 'fly')
-    },
-    {
-        "text": "a wale can swim",
-        "parsed": ("has-capacity", 'wale', 'swim')
-    },
-    {
-        "text": "if earth is a planet, it is big",
-        "parsed": ("implies",
-                   ("pertenence-to-group", 'earth', 'planet'),
-                   ("exists-property-with-value", 'earth', 'big')),
-    },
-]
-
-base_knowledge = {
-    'icecream': {
-        "groups": set(['noun', 'object', 'comestible', 'sweet']),
-    },
-    'lava': {
-        "groups": set(['noun', 'object']),
-    },
-    'earth': {
-        "groups": set(['noun', 'object', 'planet']),
-    },
-    'io': {
-        "groups": set(['noun', 'object']),
-    },
-    'green': {
-        "groups": set(['noun', 'color', 'concept']),
-    },
-    'plane': {
-        "groups": set(['noun', 'object', 'vehicle', 'fast']),
-    },
-    'car': {
-        "groups": set(['noun', 'object', 'vehicle', 'slow-ish']),
-    },
-    'wale': {
-        "groups": set(['noun', 'object', 'living-being']),
-    },
-    'cold': {
-        "groups": set(['property', 'temperature']),
-        "as_property": "temperature",
-    },
-    'dangerous': {
-        "groups": set(['property']),
-        "as_property": "safety",
-    },
-    'planet': {
-        "groups": set(['noun', 'group']),
-    },
-    'moon': {
-        "groups": set(['noun', 'group']),
-    },
-    'color': {
-        "groups": set(['property', 'group']),
-    },
-    'fly': {
-        "groups": set(['verb']),
-    },
-    'swim': {
-        "groups": set(['verb']),
-    },
-}
+tests = (
+    ("tokenization", tokenization),
+    ("basic", basic),
+    ("gac 100", gac_100),
+    ("gac+", gac_extension),
+)


-def test_assumption(expectedResponse, knowledge, query):
-    logging.info("Query: {}".format(query['text']))
-    logging.info("Expected: {}".format(expectedResponse))
-
-    result, abstract_tree, diff = knowledge.process(query['text'])
-    end_result = result.getter() if is_modifiable_property(result) else result
-
-    logging.info("\x1b[0;3{}mResult: {}\x1b[0m".format("1" if end_result != expectedResponse else "2", end_result))
-    assert(end_result == expectedResponse)
+def gen_session_name():
+    return "treeNLU-test-session.org"


 def main():
-    knowledge = KnowledgeBase(
-        knowledge=base_knowledge,
-    )
+    org_mode.create_global_session(gen_session_name())
+    failed = False
+    for test_name, test_module in tests:
+        try:
+            with org_mode.global_session().log(test_name):
+                test_module.main()
+            print(" \x1b[1;32m✓\x1b[0m {}".format(test_name))
+        except AssertionError as ae:
+            print(" \x1b[1;31m✗\x1b[0m {}{}".format(test_name,
+                                                  ('\n [Assertion] {}'.format(ae.args[0])) if len(ae.args) > 0
+                                                   else ''))
+            traceback.print_exc()
+            failed = True

-    differences = knowledge.train(examples)
+        except Exception as e:
+            print(" \x1b[1;7;31m!\x1b[0m {}\n [Exception] {}".format(test_name, e))
+            failed = True
+            traceback.print_exc()
+            raise
+    org_mode.global_session().close()

-    logging.info("----")
-    logging.info(differences())
-    logging.info("----")
-
-    test_assumption(True, knowledge, {'text': 'earth is a planet'})
-    test_assumption(True, knowledge, {'text': 'is lava dangerous?'})
-    for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
-        row = test['text']
-        result, inferred_tree, differences = knowledge.process(row)
-
-        logging.info("result:", result)
-        logging.info(differences())
-        logging.info("---")
-    logging.info('-----')
-    logging.info(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
-    logging.info('-----')
-
-    queryTrue = {
-        "text": "is io a moon?",
-        "parsed": ("question", ("pertenence-to-group", "io", "moon"))
-    }
-    queryFalse = {
-        "text": "is io a planet?",
-        "parsed": ("question", ("pertenence-to-group", "io", "planet"))
-    }
-
-    test_assumption(False, knowledge, queryFalse)
-    test_assumption(True, knowledge, queryTrue)
+    if failed:
+        exit(1)

 if __name__ == '__main__':
    main()
--- a/naive-nlu/tree_nlu/tests/basic.py
+++ b/naive-nlu/tree_nlu/tests/basic.py
@ -0,0 +1,166 @@
+from ..session.org_mode import global_session as session
+import json
+
+from ..knowledge_base import KnowledgeBase
+from ..modifiable_property import is_modifiable_property
+from ..utils.tokenization import train_basic_tokenization
+
+examples = [
+    {
+        "text": "icecream is cold",
+        "parsed": ("exists-property-with-value", 'icecream', 'cold'),
+    },
+    {
+        "text": "is icecream cold?",
+        "parsed": ("question", ("exists-property-with-value", 'icecream', 'cold'))
+    },
+    {
+        "text": "lava is dangerous",
+        "parsed": ("exists-property-with-value", 'lava', 'dangerous')
+    },
+    {
+        "text": "is lava dangerous?",
+        "parsed": ("question", ("exists-property-with-value", 'lava', 'dangerous')),
+    },
+    {
+        "text": "earth is a planet",
+        "parsed": ("pertenence-to-group", 'earth', 'planet'),
+    },
+    {
+        "text": "io is a moon",
+        "parsed": ("pertenence-to-group", 'io', 'moon'),
+    },
+    {
+        "text": "is earth a moon?",
+        "parsed": ("question", ("pertenence-to-group", 'earth', 'moon')),
+    },
+    {
+        "text": "Green is a color",
+        "parsed": ("pertenence-to-group", 'green', 'color'),
+    },
+    {
+        "text": "a plane can fly",
+        "parsed": ("has-capacity", 'plane', 'fly')
+    },
+    {
+        "text": "a wale can swim",
+        "parsed": ("has-capacity", 'wale', 'swim')
+    },
+    # {
+    #     "text": "if earth is a planet, it is big",
+    #     "parsed": ("implies",
+    #                ("pertenence-to-group", 'earth', 'planet'),
+    #                ("exists-property-with-value", 'earth', 'big')),
+    # },
+]
+
+base_knowledge = {
+    'icecream': {
+        "groups": {'noun', 'object', 'comestible', 'sweet'},
+    },
+    'lava': {
+        "groups": {'noun', 'object'},
+    },
+    'earth': {
+        "groups": {'noun', 'object', 'planet'},
+    },
+    'io': {
+        "groups": {'noun', 'object'},
+    },
+    'green': {
+        "groups": {'noun', 'color', 'concept'},
+    },
+    'plane': {
+        "groups": {'noun', 'object', 'vehicle', 'fast'},
+    },
+    'car': {
+        "groups": {'noun', 'object', 'vehicle', 'slow-ish'},
+    },
+    'wale': {
+        "groups": {'noun', 'object', 'living-being'},
+    },
+    'cold': {
+        "groups": {'property', 'temperature'},
+        "as_property": "temperature",
+    },
+    'dangerous': {
+        "groups": {'property'},
+        "as_property": "safety",
+    },
+    'planet': {
+        "groups": {'noun', 'group'},
+    },
+    'moon': {
+        "groups": {'noun', 'group'},
+    },
+    'color': {
+        "groups": {'property', 'group'},
+    },
+    'fly': {
+        "groups": {'verb'},
+    },
+    'bus': {
+        "groups": {'noun'},
+    },
+    'run': {
+        "groups": {'verb'},
+    },
+    'swim': {
+        "groups": {'verb'},
+    },
+    'planet': {
+        'groups': {'noun'}
+    }
+}
+
+def test_assumption(expectedResponse, knowledge, query):
+    with session().log(query['text']):
+        session().annotate("Expected: {}".format(expectedResponse))
+
+        result, abstract_tree, diff = knowledge.process(query['text'])
+        end_result = result.getter() if is_modifiable_property(result) else result
+
+        session().annotate("Result: {}".format(end_result))
+        if end_result != expectedResponse:
+            raise AssertionError('{} is not {}'.format(end_result, expectedResponse))
+
+def main():
+    knowledge = KnowledgeBase(
+        knowledge=base_knowledge,
+    )
+
+    train_basic_tokenization(knowledge)
+
+    for example in examples:
+        with session().log(example['text']):
+            differences = knowledge.train([example])
+
+    session().annotate("----")
+    session().annotate(differences())
+    session().annotate("----")
+
+    test_assumption(True, knowledge, {'text': 'earth is a planet'})
+    test_assumption(True, knowledge, {'text': 'is lava dangerous?'})
+    for test in [{'text': 'a bus can run'}, {'text': 'io is a moon'}]:
+        row = test['text']
+        result, inferred_tree, differences = knowledge.process(row)
+
+        session().annotate("result: {}".format(result))
+        session().annotate(differences())
+        session().annotate("---")
+    session().annotate('-----')
+    session().annotate(json.dumps(sorted(knowledge.knowledge.keys()), indent=4))
+    session().annotate('-----')
+
+    queryTrue = {
+        "text": "is io a moon?",
+        "parsed": ("question", ("pertenence-to-group", "io", "moon"))
+    }
+    queryFalse = {
+        "text": "is io a planet?",
+        "parsed": ("question", ("pertenence-to-group", "io", "planet"))
+    }
+
+    test_assumption(False, knowledge, queryFalse)
+    test_assumption(True, knowledge, queryTrue)
+    return knowledge
--- a/naive-nlu/tree_nlu/tests/gac_100.py
+++ b/naive-nlu/tree_nlu/tests/gac_100.py
@ -0,0 +1,736 @@
+from ..session.org_mode import global_session as session
+from ..knowledge_base import KnowledgeBase
+from ..utils.visuals import show_progbar
+from ..visualization import show_knowledge
+from ..utils.tokenization import train_basic_tokenization
+
+def _assert(args):
+    assert(args)
+
+def _assert_msg(args, msg):
+    assert args, msg
+
+examples = [
+    ('full_example',
+     {
+         "text": "is icecream cold?",
+         "affirmation": "icecream is cold",
+         "parsed": ("question",
+                    ("exists-property-with-value", 'icecream', 'cold')),
+         "answer": True,
+         "after_execution": [(
+             lambda knowledge: _assert('cold' in knowledge.knowledge['icecream']['property'])
+         ),],
+     }),
+    ('full_example',
+     {
+         "text": "is earth a planet?",
+         "affirmation": "earth is a planet",
+         "parsed": ("question",
+                    ("pertenence-to-group", 'earth', 'planet')),
+         "answer": True,
+         "after_execution": [(
+             lambda knowledge: _assert('planet' in knowledge.knowledge['earth']['groups'])
+         ),],
+     }),
+    ('full_example',
+     {
+         "text": "Is green a color?",
+         "affirmation": "green is a color",
+         "parsed": ("question",
+                    ("pertenence-to-group", 'green', 'color')),
+         "answer": True,
+         "after_execution": [(
+             lambda knowledge: _assert('color' in knowledge.knowledge['green']['groups'])
+         ),],
+     }),
+    ('full_example',
+     {
+         "text": "do airplanes fly?",
+         "affirmation": "airplanes fly",
+         "parsed": ("question",
+                    ("has-capacity", 'plane', 'fly')),
+         "answer": True,
+         "after_execution": [(
+             lambda knowledge: _assert('fly' in knowledge.knowledge['plane']['capacities'])
+         ),],
+     }),
+    ('full_example',
+     {
+         "text": "Is it hot during the summer?",
+         "affirmation": "it is hot during summer",
+         "parsed": ("question",
+                    ("implies", 'summer', 'hot')),
+         "answer": True,
+         "after_execution": [(
+             lambda knowledge: _assert('hot' in knowledge.knowledge['summer']['implications'])
+         ),],
+     }),
+    ('full_example',
+     {
+         "text": "is chile in south america ?",
+         "affirmation": "chile is in south america",
+         "parsed": ("question",
+                    ("property-has-value", 'chile', 'location', 'south america')),
+         "answer": True,
+         "after_execution": [(
+             lambda knowledge: _assert('south america' in knowledge.knowledge['chile']['location'])
+         ),],
+     }),
+    ('full_example',
+     {
+         "text": "Was Socrates a man?",
+         "affirmation": "Socrates was a man",
+         "parsed": ("question",
+                    ("pertenence-to-group", 'socrates', 'man')),
+         "answer": True,
+         "after_execution": [(
+             lambda knowledge: _assert('man' in knowledge.knowledge['socrates']['groups'])
+         ),],
+     }),
+    ('full_example',
+     {
+         "text": "Computers use electricity?",
+         "affirmation": "Computers use electricity",
+         "parsed": ("question",
+                    ('perform-verb-over-object', 'computers', 'use', 'electricity')),
+         "answer": True,
+         "after_execution": [(
+             lambda knowledge: _assert('electricity' in knowledge.knowledge['computers']['performs-over']['use'])
+         ),],
+     }),
+    # ('full_example',
+    #  {
+    #      "text": "The dominant language in france is french?",
+    #      "affirmation": "The dominant language in france is french",
+    #      "parsed": ("question",
+    #                 ("property-has-value", "france", "dominant-language", "french")),
+    #      "answer": True,
+    #  }),
+    # {
+    #     "text": "was abraham lincoln once president of the united states?",
+    #     "affirmation": "was abraham lincoln once president of the united states?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    ('text_example',
+     {
+         "question": "is milk white?",
+         "affirmation": "milk is white",
+         "answer": True,
+     }),
+    # {
+    #     "text": "do people have emotions?",
+    #     "affirmation": "do people have emotions?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "do objects appear smaller as they move away from you?",
+    #     "affirmation": "do objects appear smaller as they move away from you?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Does the human species have a male and female gender?",
+    #     "affirmation": "Does the human species have a male and female gender?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is a mountain mostly made of rock?",
+    #     "affirmation": "Is a mountain mostly made of rock?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "is sun microsystems a computer company?",
+    #     "affirmation": "is sun microsystems a computer company?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Do you see with your eyes and smell with your nose?",
+    #     "affirmation": "Do you see with your eyes and smell with your nose?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is smoking bad for your health?",
+    #     "affirmation": "Is smoking bad for your health?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Does a dog have four legs?",
+    #     "affirmation": "Does a dog have four legs?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Do mammals have hearts?",
+    #     "affirmation": "Do mammals have hearts?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "is the Earth a planet?",
+    #     "affirmation": "is the Earth a planet?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # ('text_example',
+    #  {
+    #      "question": "is water a liquid?",
+    #      "affirmation": "water is a liquid",
+    #      "answer": True,
+    #  }),
+    # {
+    #     "text": "Is Bugs Bunny a cartoon character?",
+    #     "affirmation": "Is Bugs Bunny a cartoon character?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Do Humans communicate by Telephone?",
+    #     "affirmation": "Do Humans communicate by Telephone?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "is beer a drink ?",
+    #     "affirmation": "is beer a drink ?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "are there 12 months in a year?",
+    #     "affirmation": "are there 12 months in a year?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "does the sun hurt your eyes when you look at it?",
+    #     "affirmation": "does the sun hurt your eyes when you look at it?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Do most cars have doors?",
+    #     "affirmation": "Do most cars have doors?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "is orange both a fruit and a colour?",
+    #     "affirmation": "is orange both a fruit and a colour?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is water a necessity?",
+    #     "affirmation": "Is water a necessity?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Do CDs have better quality sound than Cassettes?",
+    #     "affirmation": "Do CDs have better quality sound than Cassettes?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "do animals die?",
+    #     "affirmation": "do animals die?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is the arctic cold?",
+    #     "affirmation": "Is the arctic cold?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Do people have 2 eyes?",
+    #     "affirmation": "Do people have 2 eyes?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "does a person have a brain?",
+    #     "affirmation": "does a person have a brain?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is the rain wet?",
+    #     "affirmation": "Is the rain wet?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is division a mathematical operation?",
+    #     "affirmation": "Is division a mathematical operation?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "is 400 greater than 399?",
+    #     "affirmation": "is 400 greater than 399?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "is magenta a color?",
+    #     "affirmation": "is magenta a color?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Are books educational?",
+    #     "affirmation": "Are books educational?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Was the Great Wall of China built by humans?",
+    #     "affirmation": "Was the Great Wall of China built by humans?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Are pianos musical instruments?",
+    #     "affirmation": "Are pianos musical instruments?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Has Bill Clinton been President of the United States?",
+    #     "affirmation": "Has Bill Clinton been President of the United States?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is a whale a mammal?",
+    #     "affirmation": "Is a whale a mammal?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Are lemons yellow?",
+    #     "affirmation": "Are lemons yellow?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is the South Pole cold?",
+    #     "affirmation": "Is the South Pole cold?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is Africa warm?",
+    #     "affirmation": "Is Africa warm?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is Antarctica cold?",
+    #     "affirmation": "Is Antarctica cold?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is rock is generally harder than wood?",
+    #     "affirmation": "Is rock is generally harder than wood?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Do dogs chase cats?",
+    #     "affirmation": "Do dogs chase cats?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "can humans die from cold temperatures?",
+    #     "affirmation": "can humans die from cold temperatures?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "do people enjoy conversation?",
+    #     "affirmation": "do people enjoy conversation?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is Bill Clinton the President of the United States?",
+    #     "affirmation": "Is Bill Clinton the President of the United States?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Are books a good source of information?",
+    #     "affirmation": "Are books a good source of information?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "are friends different than enemies?",
+    #     "affirmation": "are friends different than enemies?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "are people alive?",
+    #     "affirmation": "are people alive?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Do triangles have 3 sides?",
+    #     "affirmation": "Do triangles have 3 sides?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is Ice cream cold?",
+    #     "affirmation": "Is Ice cream cold?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Are all sides of a square the same length?",
+    #     "affirmation": "Are all sides of a square the same length?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Do all people eat food?",
+    #     "affirmation": "Do all people eat food?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "do dentists repair teeth?",
+    #     "affirmation": "do dentists repair teeth?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is America bigger than Japan?",
+    #     "affirmation": "Is America bigger than Japan?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Do all triangles have three sides?",
+    #     "affirmation": "Do all triangles have three sides?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "A grocery store sales food?",
+    #     "affirmation": "A grocery store sales food?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Does a sunburn cause pain?",
+    #     "affirmation": "Does a sunburn cause pain?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is a computer an invention?",
+    #     "affirmation": "Is a computer an invention?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "have humans visited the moon?",
+    #     "affirmation": "have humans visited the moon?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Are there people in India?",
+    #     "affirmation": "Are there people in India?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Was Einstein a genius?",
+    #     "affirmation": "Was Einstein a genius?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Are we on the planet earth?",
+    #     "affirmation": "Are we on the planet earth?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "do people comb their hair in the morning?",
+    #     "affirmation": "do people comb their hair in the morning?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Does it hurt to lose a friend?",
+    #     "affirmation": "Does it hurt to lose a friend?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Are there people on the earth?",
+    #     "affirmation": "Are there people on the earth?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Was George Washington a president of the United States of America?",
+    #     "affirmation": "Was George Washington a president of the United States of America?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Does an ocean have salt water in it?",
+    #     "affirmation": "Does an ocean have salt water in it?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is night darker than day?",
+    #     "affirmation": "Is night darker than day?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Does a triangle have three sides?",
+    #     "affirmation": "Does a triangle have three sides?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Are peaches fruit?",
+    #     "affirmation": "Are peaches fruit?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Do people urinate?",
+    #     "affirmation": "Do people urinate?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is Germany located in Europe?",
+    #     "affirmation": "Is Germany located in Europe?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Do mirrors reflect light?",
+    #     "affirmation": "Do mirrors reflect light?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Are people born naked?",
+    #     "affirmation": "Are people born naked?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is it hot near the equator?",
+    #     "affirmation": "Is it hot near the equator?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "is paper made from trees?",
+    #     "affirmation": "is paper made from trees?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Can a female have children?",
+    #     "affirmation": "Can a female have children?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Are people born every day?",
+    #     "affirmation": "Are people born every day?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Are shoes worn on the feet?",
+    #     "affirmation": "Are shoes worn on the feet?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "does it get wet when it rains?",
+    #     "affirmation": "does it get wet when it rains?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Are there plants and insects in the rainforest which have no names?",
+    #     "affirmation": "Are there plants and insects in the rainforest which have no names?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Do people eat pigs?",
+    #     "affirmation": "Do people eat pigs?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Do businessmen wear ties?",
+    #     "affirmation": "Do businessmen wear ties?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is New York in the United States?",
+    #     "affirmation": "Is New York in the United States?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Are humans more intelligent than ants?",
+    #     "affirmation": "Are humans more intelligent than ants?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Are ravens black?",
+    #     "affirmation": "Are ravens black?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Are there rats on ships?",
+    #     "affirmation": "Are there rats on ships?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "are lions animals?",
+    #     "affirmation": "are lions animals?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "6 is greater than 5?",
+    #     "affirmation": "6 is greater than 5?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Is water made of hydrogen and oxygen?",
+    #     "affirmation": "Is water made of hydrogen and oxygen?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "is the sky blue on a clear day?",
+    #     "affirmation": "is the sky blue on a clear day?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+    # {
+    #     "text": "Do most people work during the day?",
+    #     "affirmation": "Do most people work during the day?",
+    #     "parsed": (),
+    #     "answer": None,
+    # },
+]
+
+base_knowledge = {
+    'summer': {
+        "groups": {'epoch'},
+    },
+    'fly': {
+        "groups": {'verb'},
+    },
+    'use': {
+        "groups": {'verb'},
+    },
+    'electricity': {
+        "groups": {'power'},
+    },
+    'airplanes': {},
+    'white': {
+        'groups': {'property'},
+    }
+}
+
+def main():
+    knowledge = KnowledgeBase(
+        knowledge=base_knowledge,
+    )
+
+    train_basic_tokenization(knowledge)
+
+    total = len(examples)
+
+    for i, (example_type, data) in enumerate(examples):
+        if example_type == 'full_example':
+            affirmation = {
+                'text': data['affirmation'],
+                'parsed': data['parsed'][1],
+            }
+            question = data
+
+            with session().log(data['affirmation']):
+                show_progbar(i, total, data['affirmation'])
+                differences = knowledge.train([affirmation])
+
+            with session().log(data['text']):
+                show_progbar(i, total, data['text'])
+                differences = knowledge.train([question])
+                session().annotate(differences())
+
+                result, _, _ = knowledge.process(data['text'])
+
+            if "after_execution" in data:
+                for f in data["after_execution"]:
+                    f(knowledge)
+
+            if result != data['answer']:
+                raise AssertionError('{} is not {}'.format(result, data['answer']))
+
+        elif example_type == 'text_example':
+            with session().log(data['affirmation']):
+                show_progbar(i, total, data['affirmation'])
+                affirmation = data['affirmation']
+                session().annotate("Processing affirmation: {}".format(affirmation))
+                _, _, _ = knowledge.process(affirmation)
+
+            with session().log(data['question']):
+                show_progbar(i, total, data['question'])
+                question = data['question']
+                session().annotate("Processing question : {}".format(question))
+                result, _, _ = knowledge.process(question)
+
+                if result != data['answer']:
+                    raise AssertionError('{} is not {}'.format(result, data['answer']))
+
+        else:
+            raise NotImplementedError('Example type: {}'.format(example_type))
+
+        print("\r\x1b[K", end='')
+    return knowledge
+
+
+if __name__ == '__main__':
+    show_knowledge(main())
--- a/naive-nlu/tree_nlu/tests/gac_extension.py
+++ b/naive-nlu/tree_nlu/tests/gac_extension.py
@ -0,0 +1,26 @@
+from ..knowledge_base import KnowledgeBase
+from ..session.org_mode import global_session as session
+
+from . import gac_100
+
+
+def ask_then_learn_test(knowledge: KnowledgeBase):
+    with session().log("is icecream blue?"):
+        ret, _, _ = knowledge.process("is icecream blue?")
+        assert(ret is False)
+
+    with session().log("icecream is blue"):
+        ret, _, _ = knowledge.process("icecream is blue")
+
+    with session().log("is icecream blue?"):
+        ret, _, _ = knowledge.process("is icecream blue?")
+        assert(ret is True)
+
+    return knowledge
+
+
+def main():
+    knowledge = gac_100.main()
+
+    knowledge.knowledge['blue'] = {'groups': {'property'}}
+    knowledge = ask_then_learn_test(knowledge)
--- a/naive-nlu/tree_nlu/tests/tokenization.py
+++ b/naive-nlu/tree_nlu/tests/tokenization.py
@ -0,0 +1,80 @@
+from ..session.org_mode import global_session as session
+from ..knowledge_base import KnowledgeBase
+from ..utils.visuals import show_progbar
+from ..visualization import show_knowledge
+
+
+def _assert(args):
+    assert(args)
+
+
+def _assert_msg(args, msg):
+    assert args, msg
+
+
+EXAMPLES = [
+    ('example', {
+        "text": 'cat',
+        "tokens": ['cat'],
+    }),
+    ('example', {
+        "text": 'cats',
+        "tokens": ['cats'],
+        "meaning": { 'cats': ('add-modifier', 'cat', 'plural') },
+    }),
+    ('example', {
+        "text": 'text separated by spaces',
+        "tokens": ['text', 'separated', 'by', 'spaces'],
+    }),
+    ('example', {
+        "text": 'is earth a planet?',
+        "tokens": ['is', 'earth', 'a', 'planet', '?'],
+    }),
+    ('test', {
+        "text": 'plane',
+        "tokens": ['plane'],
+    }),
+    # ('test', {
+    #     "text": 'planes',
+    #     "tokens": ['planes'],
+    #     "meaning": { 'planes': ('add-modifier', 'plane', 'plural') },
+    # }),
+    ('test', {
+        "text": 'some other text',
+        "tokens": ['some', 'other', 'text'],
+    }),
+    ('test', {
+        "text": 'is the sun a star?',
+        "tokens": ['is', 'the', 'sun', 'a', 'star', '?'],
+    }),
+    ('test', {
+        "text": 'sometextnotseparatedbyspaces',
+        "tokens": ['some', 'text', 'not', 'separated', 'by', 'spaces'],
+    })
+]
+
+
+def main():
+    knowledge = KnowledgeBase()
+
+    total = len(EXAMPLES)
+
+    for i, (case_type, example) in enumerate(EXAMPLES):
+        show_progbar(i, total, example['text'])
+        if case_type == 'example':
+            with session().log(example['text']):
+                knowledge.layers.tokenization.train(example)
+
+        elif case_type == 'test':
+            with session().log(example['text']):
+                tokens = list(knowledge.layers.tokenization.tokenize(example['text']))
+
+                session().log('Expected “{}”, found “{}”'
+                            .format(example['tokens'], tokens))
+                assert example['tokens'] == tokens
+
+        else:
+            raise Exception('Not implemented case {}'.format(case_type))
+
+    print("\r\x1b[K", end='')
+    return knowledge
--- a/naive-nlu/tree_nlu/utils/json_dumper.py
+++ b/naive-nlu/tree_nlu/utils/json_dumper.py
@ -0,0 +1,4 @@
+def dumper(obj):
+    if isinstance(obj, set):
+        return list(obj)
+    return obj
--- a/naive-nlu/tree_nlu/utils/tokenization.py
+++ b/naive-nlu/tree_nlu/utils/tokenization.py
@ -0,0 +1,29 @@
+from ..session.org_mode import (
+    global_session as session,
+)
+
+BASIC_TOKENIZATION_EXAMPLES = (
+    ({
+        "text": 'cat',
+        "tokens": ['cat'],
+    }),
+    ({
+        "text": 'cats',
+        "tokens": ['cats'],
+        "meaning": { 'cats': ('add-modifier', 'cat', 'plural') },
+    }),
+    ({
+        "text": 'text separated by spaces',
+        "tokens": ['text', 'separated', 'by', 'spaces'],
+    }),
+    ({
+        "text": 'is earth a planet?',
+        "tokens": ['is', 'earth', 'a', 'planet', '?'],
+    }),
+)
+
+
+def train_basic_tokenization(knowledge_base):
+    with session().log('Training basic tokenization'):
+        for example in BASIC_TOKENIZATION_EXAMPLES:
+            knowledge_base.layers.tokenization.train(example)
--- a/naive-nlu/tree_nlu/utils/visuals.py
+++ b/naive-nlu/tree_nlu/utils/visuals.py
@ -0,0 +1,15 @@
+def show_progbar(done, total, msg=''):
+    total_blocks = 10
+    blocks_done = (done * total_blocks) // total
+    blocks_to_go = total_blocks - blocks_done
+
+    print('\r\x1b[K'  # Go to the start of the line
+          '\x1b[0m'   # Restart the "style"
+          '|'  # Put the first "|"
+          + blocks_done  * '█' # Completed blocks
+          + blocks_to_go * ' ' # Uncompleted blocks
+          + '\x1b[7m|\x1b[0m'  # End the bar
+          + ' '
+          + msg  # Add message
+          + '\r' # Go back to the start
+          , end='')
--- a/naive-nlu/tree_nlu/visualization.py
+++ b/naive-nlu/tree_nlu/visualization.py
@ -0,0 +1,8 @@
+def show_knowledge(knowledge):
+    for key in knowledge.knowledge:
+        print("\x1b[1m{}\x1b[0m {}".format(key, knowledge.knowledge[key]))
+
+
+def show_samples(knowledge):
+    for example in knowledge.originals:
+        print("{}".format(example))
Author	SHA1	Message	Date
kenkeiras	178dadc57f	Merge branch 'layered-model' into naive-nlu	2018-04-25 20:17:53 +02:00
kenkeiras	712503804d	Properly handle solutions not found.	2018-04-24 23:12:14 +02:00
kenkeiras	1ded981099	Pass test using layer structure.	2018-04-24 23:01:36 +02:00
kenkeiras	a444766c7c	Exploration of layers for tokenization and parsing.	2018-04-23 22:48:10 +02:00
kenkeiras	c18c9b8cb1	Merge branch 'learn-tokenization' into naive-nlu	2018-04-16 00:00:12 +02:00
kenkeiras	8b67b96d2f	Separate tokenization module.	2018-04-15 22:15:28 +02:00
kenkeiras	1306306723	Pass tests using tokenization.	2018-04-15 21:10:49 +02:00
kenkeiras	45cc3a8a31	Train basic tokenization before gac_100 tests.	2018-04-15 20:47:08 +02:00
kenkeiras	6c46f9db4b	Fix element_matches_bugs when element is a dictionary.	2018-04-15 20:46:43 +02:00
kenkeiras	ee5492e69d	Log tokenization options in a section separated from results.	2018-04-15 20:45:59 +02:00
kenkeiras	d63781a0d2	Learn from tokenizations inferred.	2018-04-15 20:45:24 +02:00
kenkeiras	6fb1e1e649	Replace debugging prints by session logs.	2018-04-15 20:21:09 +02:00
kenkeiras	79034f85a9	Move to a chaining model for tokenization. This model also explores more tokenization possibilities. With this, the tokenization tests are passed.	2018-04-15 20:06:23 +02:00
kenkeiras	998a183fd2	Dig deeper in cut-by-token approach.	2018-04-15 17:47:04 +02:00
kenkeiras	d601ae3f83	Increase logging, add failing tokenization tests.	2018-04-15 17:08:01 +02:00
kenkeiras	40b63128af	Save structural elements.	2018-04-15 17:07:29 +02:00
kenkeiras	fc37450565	Add (non-passing) tokenization.	2018-04-15 16:41:07 +02:00
kenkeiras	75174e1736	Increase exploration, remove unnecessary initial knowledge.	2017-10-02 23:37:22 +02:00
kenkeiras	a0810dd0e8	Add session files base implementation.	2017-10-01 20:59:11 +02:00
kenkeiras	13ed48c6b4	Clearer candidate results logging.	2017-10-01 20:58:45 +02:00
kenkeiras	0e41a98857	Fix the dirty log level flag.	2017-10-01 20:55:55 +02:00
kenkeiras	c1055bd703	Make timestamp header a default.	2017-10-01 20:52:58 +02:00
kenkeiras	75f00e7171	Fix session logging level bug.	2017-10-01 20:49:28 +02:00
kenkeiras	2f76cdc260	Add more logging context.	2017-10-01 20:46:48 +02:00
kenkeiras	6278cc43f7	Remove color strings from session logs.	2017-10-01 20:37:51 +02:00
kenkeiras	bb7d438e0d	Add session context, increase logging.	2017-10-01 17:10:50 +02:00
kenkeiras	359f858c39	Add session files base concept.	2017-09-30 01:32:04 +02:00
kenkeiras	58fb9fb188	Add extended gac tests.	2017-09-30 00:58:24 +02:00
kenkeiras	2c36dd9b7e	Save original examples (needed on show_samples).	2017-09-30 00:55:44 +02:00
kenkeiras	d607b2210e	Avoid crashing when a fit is not found.	2017-09-30 00:55:01 +02:00
kenkeiras	542c4fca4b	Add more specific errors on the cli module.	2017-09-30 00:54:09 +02:00
kenkeiras	3abbd40b26	Add show_sample visualization.	2017-09-30 00:53:42 +02:00
kenkeiras	23256b945d	Reduce unneeded base knowledge.	2017-09-22 01:03:47 +02:00
kenkeiras	91dd5a9c6c	Separate visualization module.	2017-09-22 01:00:39 +02:00
kenkeiras	e6fbbc19e0	Adjust info logging level.	2017-09-22 01:00:09 +02:00
kenkeiras	379855766c	Fix bug in gac_100 test.	2017-09-22 00:52:04 +02:00
kenkeiras	fde31b69a8	Fix reference-after-use bug.	2017-09-20 21:24:40 -04:00
kenkeiras	4b12bc589e	Accept new words when we don't know what they mean.	2017-09-20 21:20:50 -04:00
kenkeiras	16a895dc22	Fix debugging logging formats.	2017-09-20 21:11:15 -04:00
kenkeiras	4e8f82c0a5	Add debug command.	2017-09-20 21:04:15 -04:00
kenkeiras	d23329b019	Add cli base.	2017-09-17 22:01:59 -04:00
kenkeiras	fae11eb875	Change prints into loggings.	2017-09-17 21:28:25 -04:00
kenkeiras	b16df096d7	Add new GAC100 test example.	2017-06-12 16:02:58 +02:00
kenkeiras	dad1ef6fd8	Add the perform-verb-over-object operator. To support the tests for this, the remixing and matching mechanisms had to be modified.	2017-06-04 18:58:42 +02:00
kenkeiras	46dcf55793	Expand the test cases applied in the NLU approach.	2017-06-04 18:57:02 +02:00
kenkeiras	b42bf37f77	Improve the remixing and fitting mechanisms.	2017-06-04 18:55:45 +02:00
kenkeiras	0fbb9238eb	Extract more contextual info from the words. A property dictionary is now only to be considered equal to a word when it shares at least one group, or neither has groups.	2017-06-04 18:53:17 +02:00
kenkeiras	d3b604efca	Visualize & use more data. * Visualize more steps in the process. * Collect more possibilities in get_fit().	2017-05-29 23:23:53 +02:00
kenkeiras	9d49d00688	Work in progress. * Test * More debugging * Base concept	2017-05-28 22:39:07 +02:00
kenkeiras	aa7bee4c8b	Add perform-verb-over-object and (failing) test.	2017-05-24 23:54:58 +02:00
kenkeiras	e0a5f02c34	Add progress bar visuals to tests.	2017-05-24 22:53:36 +02:00
kenkeiras	8e304b2a09	Always create the "groups" set for new elements. This allows a smaller initial knowledge base.	2017-05-24 22:26:30 +02:00
kenkeiras	a7f70d2888	Unlock 7th GAC 100.	2017-05-24 22:17:57 +02:00
kenkeiras	9ed43aa362	Return None when a matrix cannot be applied.	2017-05-24 22:16:36 +02:00
kenkeiras	89b281fd6f	Lean on knowledge too when defining bounds.	2017-05-24 22:16:03 +02:00
kenkeiras	7cdf8a310d	Unroll get_matching last list-comprehension.	2017-05-24 22:10:19 +02:00
kenkeiras	e6cbb54382	Also use matching tokens to score tree similarity.	2017-05-24 22:09:28 +02:00
kenkeiras	75d690120b	Improve error reporting on tests.	2017-05-24 22:06:18 +02:00
kenkeiras	cbeefcf76b	Identify group "concepts" on-flight.	2017-05-24 22:05:46 +02:00
kenkeiras	2bfe676b2d	Integrate the knowledge ASAP. If we do this before the parsing we can leverage that semantics in the matching phase.	2017-05-24 22:03:37 +02:00
kenkeiras	02f909269a	Use after_execution mechanism for test asserts.	2017-05-24 21:09:23 +02:00
kenkeiras	4d7afb0174	Add set-capable json dumper.	2017-05-24 20:42:54 +02:00
kenkeiras	e51ba71ec5	Add after_execution mechanism to gac100 test.	2017-05-24 20:42:45 +02:00
kenkeiras	a99449c04a	Add property-has-value example.	2017-05-24 20:42:15 +02:00
kenkeiras	bbba6b75e1	Make remix model more powerful. Accept elements in the remix that are not present in the subtrees.	2017-05-24 20:30:50 +02:00
kenkeiras	d029ecd91d	Implication example.	2017-05-24 20:13:42 +02:00
kenkeiras	22534160c9	On fail or exception exit with non-zero code. Also, on exceptions print the exception stacktrace.	2017-05-23 23:34:33 +02:00
kenkeiras	0b52ade6b5	Small colorization on the test interface.	2017-05-23 23:32:41 +02:00
kenkeiras	3cfc03373f	Use {set} notation for sets.	2017-05-23 23:29:34 +02:00
kenkeiras	e6e8146478	Allow learning from unparsed data in tests.	2017-05-23 23:28:01 +02:00
kenkeiras	586ac76d1f	Default to ERROR logging on tests.	2017-05-23 23:18:02 +02:00
kenkeiras	460ad73bba	Handle the possibility of remixes not working.	2017-05-23 23:17:46 +02:00
kenkeiras	6693b7deb0	Remove need from `as_property` info. Probably this can be improved upon if the data is later analyzed with it's similars.	2017-05-23 23:16:21 +02:00
kenkeiras	d6628101de	Base gac 100.	2017-05-23 22:16:27 +02:00
kenkeiras	23ae882161	Separated basic test.	2017-05-23 21:57:51 +02:00