From e73ce5d480ec2a963409cd0e4e935ab86d3825bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Mon, 2 Nov 2020 20:39:16 +0100
Subject: [PATCH] Format with black, use tokens for markup segmentation.

 - Don't use trees in first instance as interleaving might be lossy.
---
 org_dom/org_dom.py            | 526 ++++++++++++++++++----------------
 org_dom/utils.py              |   4 +-
 tests/02-markup.org           |   2 +
 tests/test_dom.py             |  36 ++-
 tests/utils/dom_assertions.py |  62 ++--
 5 files changed, 356 insertions(+), 274 deletions(-)
diff --git a/org_dom/org_dom.py b/org_dom/org_dom.py
index ff5fdac..98fa5ff 100644
--- a/org_dom/org_dom.py
+++ b/org_dom/org_dom.py
@@ -1,11 +1,12 @@
+import collections
 import logging
 import re
-import collections
+from enum import Enum
 from typing import List, Tuple
 
 BASE_ENVIRONMENT = {
-    'org-footnote-section': 'Footnotes',
-    'org-options-keywords': (
+    "org-footnote-section": "Footnotes",
+    "org-options-keywords": (
         "ARCHIVE:",
         "AUTHOR:",
         "BIND:",
@@ -30,52 +31,103 @@ BASE_ENVIRONMENT = {
         "SEQ_TODO:",
         "SETUPFILE:",
         "STARTUP:",
-        "TAGS:"
-        "TITLE:",
+        "TAGS:" "TITLE:",
         "TODO:",
         "TYP_TODO:",
         "SELECT_TAGS:",
-        "EXCLUDE_TAGS:"
+        "EXCLUDE_TAGS:",
     ),
 }
 
 
-HEADLINE_RE = re.compile(r'^(?P<stars>\*+) (?P<spacing>\s*)(?P<line>.*)$')
-KEYWORDS_RE = re.compile(r'^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$')
-PROPERTY_DRAWER_RE = re.compile(r'^(?P<indentation>\s*):PROPERTIES:(?P<end_indentation>\s*)$')
-DRAWER_END_RE = re.compile(r'^(?P<indentation>\s*):END:(?P<end_indentation>\s*)$')
-NODE_PROPERTIES_RE = re.compile(r'^(?P<indentation>\s*):(?P<key>[^+:]+)(?P<plus>\+)?:(?P<spacing>\s*)(?P<value>.*)$')
-RAW_LINE_RE = re.compile(r'^\s*([^\s#:*]|$)')
-BASE_TIME_STAMP_RE = r'(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}) (?P<dow>[^ ]+)( (?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2})(--(?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?)?'
+HEADLINE_RE = re.compile(r"^(?P<stars>\*+) (?P<spacing>\s*)(?P<line>.*)$")
+KEYWORDS_RE = re.compile(
+    r"^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$"
+)
+PROPERTY_DRAWER_RE = re.compile(
+    r"^(?P<indentation>\s*):PROPERTIES:(?P<end_indentation>\s*)$"
+)
+DRAWER_END_RE = re.compile(r"^(?P<indentation>\s*):END:(?P<end_indentation>\s*)$")
+NODE_PROPERTIES_RE = re.compile(
+    r"^(?P<indentation>\s*):(?P<key>[^+:]+)(?P<plus>\+)?:(?P<spacing>\s*)(?P<value>.*)$"
+)
+RAW_LINE_RE = re.compile(r"^\s*([^\s#:*]|$)")
+BASE_TIME_STAMP_RE = r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}) (?P<dow>[^ ]+)( (?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2})(--(?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?)?"
 
-ACTIVE_TIME_STAMP_RE = re.compile(r'<{}>'.format(BASE_TIME_STAMP_RE))
-INACTIVE_TIME_STAMP_RE = re.compile(r'\[{}\]'.format(BASE_TIME_STAMP_RE))
+ACTIVE_TIME_STAMP_RE = re.compile(r"<{}>".format(BASE_TIME_STAMP_RE))
+INACTIVE_TIME_STAMP_RE = re.compile(r"\[{}\]".format(BASE_TIME_STAMP_RE))
 
 # BASE_TIME_RANGE_RE = (r'(?P<start_year>\d{4})-(?P<start_month>\d{2})-(?P<start_day>\d{2}) (?P<start_dow>[^ ]+)((?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2}))?',
 #                       r'(?P<end_year>\d{4})-(?P<end_month>\d{2})-(?P<end_day>\d{2}) (?P<end_dow>[^ ]+)((?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?')
 
-Headline = collections.namedtuple('Headline', ('start_line', 'depth',
-                                               'orig',
-                                               'properties', 'keywords',
-                                               'priority_start', 'priority',
-                                               'title_start', 'title',
-                                               'tags_start', 'tags',
-                                               'contents',
-                                               'children',
-                                               'structural',
-))
+Headline = collections.namedtuple(
+    "Headline",
+    (
+        "start_line",
+        "depth",
+        "orig",
+        "properties",
+        "keywords",
+        "priority_start",
+        "priority",
+        "title_start",
+        "title",
+        "tags_start",
+        "tags",
+        "contents",
+        "children",
+        "structural",
+    ),
+)
 
-RawLine = collections.namedtuple('RawLine', ('linenum', 'line'))
-Keyword = collections.namedtuple('Keyword', ('linenum', 'match', 'key', 'value', 'options'))
-Property = collections.namedtuple('Property', ('linenum', 'match', 'key', 'value', 'options'))
+RawLine = collections.namedtuple("RawLine", ("linenum", "line"))
+Keyword = collections.namedtuple(
+    "Keyword", ("linenum", "match", "key", "value", "options")
+)
+Property = collections.namedtuple(
+    "Property", ("linenum", "match", "key", "value", "options")
+)
 
 # @TODO How are [YYYY-MM-DD HH:mm--HH:mm] and ([... HH:mm]--[... HH:mm]) differentiated ?
 # @TODO Consider recurrence annotations
-TimeRange = collections.namedtuple('TimeRange', ('start_time', 'end_time'))
-Timestamp = collections.namedtuple('Timestamp', ('active', 'year', 'month', 'day', 'dow', 'hour', 'minute'))
+TimeRange = collections.namedtuple("TimeRange", ("start_time", "end_time"))
+Timestamp = collections.namedtuple(
+    "Timestamp", ("active", "year", "month", "day", "dow", "hour", "minute")
+)
+
+
+class MarkerType(Enum):
+    NO_MODE = 0b0
+    BOLD_MODE = 0b1
+    CODE_MODE = 0b10
+    ITALIC_MODE = 0b100
+    STRIKE_MODE = 0b1000
+    UNDERLINED_MODE = 0b10000
+    VERBATIM_MODE = 0b100000
+
+MARKERS = {
+    "*": MarkerType.BOLD_MODE,
+    "~": MarkerType.CODE_MODE,
+    "/": MarkerType.ITALIC_MODE,
+    "+": MarkerType.STRIKE_MODE,
+    "_": MarkerType.UNDERLINED_MODE,
+    "=": MarkerType.VERBATIM_MODE,
+}
+
+ModeToMarker = {}
+
+for tok, mode in MARKERS.items():
+    ModeToMarker[mode] = tok
+
+MarkerToken = collections.namedtuple("MarkerToken", ("closing", "tok_type"))
+
+BEGIN_PROPERTIES = "OPEN_PROPERTIES"
+END_PROPERTIES = "CLOSE_PROPERTIES"
+
+def token_from_type(tok_type):
+    print(ModeToMarker, tok_type)
+    return ModeToMarker[tok_type]
 
-BEGIN_PROPERTIES = 'OPEN_PROPERTIES'
-END_PROPERTIES = 'CLOSE_PROPERTIES'
 
 def parse_org_time(value):
     if m := ACTIVE_TIME_STAMP_RE.match(value):
@@ -85,29 +137,57 @@ def parse_org_time(value):
     else:
         return None
 
-    if m.group('end_hour'):
-        return TimeRange(Timestamp(active, int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('start_hour')), int(m.group('start_minute'))),
-                         Timestamp(active, int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('end_hour')), int(m.group('end_minute'))))
-    return Timestamp(active, int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('start_hour')), int(m.group('start_minute')))
+    if m.group("end_hour"):
+        return TimeRange(
+            Timestamp(
+                active,
+                int(m.group("year")),
+                int(m.group("month")),
+                int(m.group("day")),
+                m.group("dow"),
+                int(m.group("start_hour")),
+                int(m.group("start_minute")),
+            ),
+            Timestamp(
+                active,
+                int(m.group("year")),
+                int(m.group("month")),
+                int(m.group("day")),
+                m.group("dow"),
+                int(m.group("end_hour")),
+                int(m.group("end_minute")),
+            ),
+        )
+    return Timestamp(
+        active,
+        int(m.group("year")),
+        int(m.group("month")),
+        int(m.group("day")),
+        m.group("dow"),
+        int(m.group("start_hour")),
+        int(m.group("start_minute")),
+    )
+
 
 def timestamp_to_string(ts):
-    date = '{year}-{month:02d}-{day:02d}'.format(
-        year=ts.year,
-        month=ts.month,
-        day=ts.day
+    date = "{year}-{month:02d}-{day:02d}".format(
+        year=ts.year, month=ts.month, day=ts.day
     )
     if ts.dow:
-        date = date + ' ' + ts.dow
+        date = date + " " + ts.dow
 
     if ts.hour is not None:
-        base = '{date} {hour:02}:{minute:02d}'.format(date=date, hour=ts.hour, minute=ts.minute)
+        base = "{date} {hour:02}:{minute:02d}".format(
+            date=date, hour=ts.hour, minute=ts.minute
+        )
     else:
         base = date
 
     if ts.active:
-        return '<{}>'.format(base)
+        return "<{}>".format(base)
     else:
-        return '[{}]'.format(base)
+        return "[{}]".format(base)
+
 
 def get_raw(doc):
     if isinstance(doc, str):
@@ -115,6 +195,7 @@ def get_raw(doc):
     else:
         return doc.get_raw()
 
+
 class Line:
     def __init__(self, linenum, contents):
         self.linenum = linenum
@@ -127,7 +208,8 @@ class Line:
                 rawchunks.append(chunk)
             else:
                 rawchunks.append(chunk.get_raw())
-        return ''.join(rawchunks) + '\n'
+        return "".join(rawchunks) + "\n"
+
 
 class Text:
     def __init__(self, contents, line):
@@ -135,104 +217,122 @@ class Text:
         self.linenum = line
 
     def get_raw(self):
-        raw = ''.join(self.contents)
-        return raw
+        contents = []
+        for chunk in self.contents:
+            if isinstance(chunk, str):
+                contents.append(chunk)
+            else:
+                assert isinstance(chunk, MarkerToken)
+                contents.append(token_from_type(chunk.tok_type))
+        return ''.join(contents)
+
 
 class Bold:
-    Marker = '*'
+    Marker = "*"
 
     def __init__(self, contents, line):
         self.contents = contents
 
     def get_raw(self):
-        raw = ''.join(map(get_raw, self.contents))
+        raw = "".join(map(get_raw, self.contents))
         return f"{self.Marker}{raw}{self.Marker}"
 
+
 class Code:
-    Marker = '~'
+    Marker = "~"
 
     def __init__(self, contents, line):
         self.contents = contents
 
     def get_raw(self):
-        raw = ''.join(map(get_raw, self.contents))
+        raw = "".join(map(get_raw, self.contents))
         return f"{self.Marker}{raw}{self.Marker}"
 
+
 class Italic:
-    Marker = '/'
+    Marker = "/"
 
     def __init__(self, contents, line):
         self.contents = contents
 
     def get_raw(self):
-        raw = ''.join(map(get_raw, self.contents))
+        raw = "".join(map(get_raw, self.contents))
         return f"{self.Marker}{raw}{self.Marker}"
 
+
 class Strike:
-    Marker = '+'
+    Marker = "+"
 
     def __init__(self, contents, line):
         self.contents = contents
 
     def get_raw(self):
-        raw = ''.join(map(get_raw, self.contents))
+        raw = "".join(map(get_raw, self.contents))
         return f"{self.Marker}{raw}{self.Marker}"
 
+
 class Underlined:
-    Marker = '_'
+    Marker = "_"
 
     def __init__(self, contents, line):
         self.contents = contents
 
     def get_raw(self):
-        raw = ''.join(map(get_raw, self.contents))
+        raw = "".join(map(get_raw, self.contents))
         return f"{self.Marker}{raw}{self.Marker}"
 
+
 class Verbatim:
-    Marker = '='
+    Marker = "="
 
     def __init__(self, contents, line):
         self.contents = contents
 
     def get_raw(self):
-        raw = ''.join(map(get_raw, self.contents))
+        raw = "".join(map(get_raw, self.contents))
         return f"{self.Marker}{raw}{self.Marker}"
 
 
 def is_pre(char: str) -> bool:
     if isinstance(char, str):
-        return char in '\n\r\t -({\'"'
+        return char in "\n\r\t -({'\""
     else:
         return True
 
+
 def is_marker(char: str) -> bool:
     if isinstance(char, str):
-        return char in '*=/+_~'
+        return char in "*=/+_~"
     else:
         return False
 
+
 def is_border(char: str) -> bool:
     if isinstance(char, str):
-        return char not in '\n\r\t '
+        return char not in "\n\r\t "
     else:
         return False
 
+
 def is_body(char: str) -> bool:
     if isinstance(char, str):
         return True
     else:
         return False
 
+
 def is_post(char: str) -> bool:
     if isinstance(char, str):
-        return char in '-.,;:!?\')}["'
+        return char in "-.,;:!?')}[\""
     else:
         return False
 
+
 TOKEN_TYPE_TEXT = 0
 TOKEN_TYPE_OPEN_MARKER = 1
 TOKEN_TYPE_CLOSE_MARKER = 2
 
+
 def tokenize_contents(contents: str):
     tokens = []
     last_char = None
@@ -244,17 +344,17 @@ def tokenize_contents(contents: str):
         has_changed = False
 
         if (
-                (i not in closes)
-                and is_marker(char)
-                and is_pre(last_char)
-                and ((i + 1 < len(contents))
-                     and is_border(contents[i + 1]))):
+            (i not in closes)
+            and is_marker(char)
+            and is_pre(last_char)
+            and ((i + 1 < len(contents)) and is_border(contents[i + 1]))
+        ):
 
             is_valid_mark = False
             # Check that is closed later
             text_in_line = True
             for j in range(i, len(contents) - 1):
-                if contents[j] == '\n':
+                if contents[j] == "\n":
                     if not text_in_line:
                         break
                     text_in_line = False
@@ -267,13 +367,13 @@ def tokenize_contents(contents: str):
 
             if is_valid_mark:
                 if len(text) > 0:
-                    tokens.append((TOKEN_TYPE_TEXT, ''.join(text)))
+                    tokens.append((TOKEN_TYPE_TEXT, "".join(text)))
                     text = []
                 tokens.append((TOKEN_TYPE_OPEN_MARKER, char))
                 has_changed = True
         elif i in closes:
             if len(text) > 0:
-                tokens.append((TOKEN_TYPE_TEXT, ''.join(text)))
+                tokens.append((TOKEN_TYPE_TEXT, "".join(text)))
                 text = []
             tokens.append((TOKEN_TYPE_CLOSE_MARKER, char))
             has_changed = True
@@ -283,156 +383,57 @@ def tokenize_contents(contents: str):
         last_char = char
 
     if len(text) > 0:
-        tokens.append((TOKEN_TYPE_TEXT, ''.join(text)))
+        tokens.append((TOKEN_TYPE_TEXT, "".join(text)))
 
     return tokens
 
 
-def parse_contents(raw_contents:List[RawLine]):
-    NO_MODE =         0b0
-    BOLD_MODE =       0b1
-    CODE_MODE =       0b10
-    ITALIC_MODE =     0b100
-    STRIKE_MODE =     0b1000
-    UNDERLINED_MODE = 0b10000
-    VERBATIM_MODE =   0b100000
-
-    MARKERS = {
-        '*': BOLD_MODE,
-        '~': CODE_MODE,
-        '/': ITALIC_MODE,
-        '+': STRIKE_MODE,
-        '_': UNDERLINED_MODE,
-        '=': VERBATIM_MODE,
-    }
-    MODES = (
-        (BOLD_MODE, Bold),
-        (CODE_MODE, Code),
-        (ITALIC_MODE, Italic),
-        (STRIKE_MODE, Strike),
-        (UNDERLINED_MODE, Underlined),
-        (VERBATIM_MODE, Verbatim),
-    )
-    _MODES = {
-        BOLD_MODE: Bold,
-        CODE_MODE: Code,
-        ITALIC_MODE: Italic,
-        STRIKE_MODE: Strike,
-        UNDERLINED_MODE: Underlined,
-        VERBATIM_MODE: Verbatim,
-    }
-
-    mode = NO_MODE
-    escaped = False
-
-    chunk = []
-    inline = []
-    chunks = []
-
-    linenum = start_linenum = raw_contents[0].linenum
+def parse_contents(raw_contents: List[RawLine]):
     contents_buff = []
     for line in raw_contents:
         contents_buff.append(line.line)
 
-    contents = '\n'.join(contents_buff)
+    contents = "\n".join(contents_buff)
     tokens = tokenize_contents(contents)
+    current_line = raw_contents[0].linenum
 
-    # Use tokens to tag chunks of text with it's container type
-    for (tok_type, tok_val) in tokens:
-        if tok_type == TOKEN_TYPE_TEXT:
-            chunks.append((mode, tok_val))
-        elif tok_type == TOKEN_TYPE_OPEN_MARKER:
-            mode = mode | MARKERS[tok_val]
-        elif tok_type == TOKEN_TYPE_OPEN_MARKER:
-            mode = mode ^ MARKERS[tok_val]
-
-    # Convert those chunks to a tree
-    def tree_for_tag(tag, in_mode):
-        tree = []
-        for (mask, mode) in MODES:
-            if (mask & tag) and not (mask & in_mode):
-                tree.append(mode)
-        print(tree)
-        if len(tree) == 0:
-            return Text
-
-
-    if len(raw_contents) > 0:
-        current_line = raw_contents[0].linenum
-
-    # tree = []
-    # pos = []
-    # print('\n'.join(map(str, chunks)))
-    # for (tag, chunk) in chunks:
-    #     if pos == []:
-    #         tree.append(tree_for_tag(tag, NO_MODE)(chunk, line=current_line))
-    #         pos.append(tree[-1])
-    #     else:
-    #         raise NotImplementedError()
-
-    #     current_line += chunk.count('\n')
-
-
-    tree = []
-    mode_tree = []
     contents = []
     # Use tokens to tag chunks of text with it's container type
     for (tok_type, tok_val) in tokens:
         if tok_type == TOKEN_TYPE_TEXT:
-            if len(mode_tree) == 0:
-                tree.append(Text(tok_val, current_line))
-            else:
-                contents[-1].append(tok_val)
-
-            current_line += chunk.count('\n')
-
+            contents.append(tok_val)
         elif tok_type == TOKEN_TYPE_OPEN_MARKER:
-            mode_tree.append(_MODES[MARKERS[tok_val]])
-            contents.append([])
-
+            contents.append(MarkerToken(False, MARKERS[tok_val]))
         elif tok_type == TOKEN_TYPE_CLOSE_MARKER:
-            mode = _MODES[MARKERS[tok_val]]
-            matching_mode = mode_tree.pop()
-            assert mode == matching_mode
-            value = mode(contents.pop(), current_line)
-            current_line += chunk.count('\n')
+            contents.append(MarkerToken(True, MARKERS[tok_val]))
 
-            if len(mode_tree) == 0:  # Closed branch of tree
-                tree.append(value)
-            else:
-                print("{} <- {}".format(mode_tree[-1], mode))
-                contents[-1].append(value)
+    return [Text(contents, current_line)]
 
-            current_line += chunk.count('\n')
-
-    if len(tree) > 3:
-        print("L", len(tree))
-    print("F:", tree)
-    return tree
 
 def parse_headline(hl) -> Headline:
-    stars = hl['orig'].group('stars')
+    stars = hl["orig"].group("stars")
     depth = len(stars)
 
     # TODO: Parse line for priority, cookies and tags
-    line = hl['orig'].group('line')
+    line = hl["orig"].group("line")
     title = line.strip()
-    contents = parse_contents(hl['contents'])
+    contents = parse_contents(hl["contents"])
 
-    return Headline(start_line=hl['linenum'],
-                    depth=depth,
-                    orig=hl['orig'],
-                    title=title,
-                    contents=contents,
-                    children=[parse_headline(child) for child in hl['children']],
-                    keywords=hl['keywords'],
-                    properties=hl['properties'],
-                    structural=hl['structural'],
-                    title_start=None,
-                    priority=None,
-                    priority_start=None,
-                    tags_start=None,
-                    tags=None,
+    return Headline(
+        start_line=hl["linenum"],
+        depth=depth,
+        orig=hl["orig"],
+        title=title,
+        contents=contents,
+        children=[parse_headline(child) for child in hl["children"]],
+        keywords=hl["keywords"],
+        properties=hl["properties"],
+        structural=hl["structural"],
+        title_start=None,
+        priority=None,
+        priority_start=None,
+        tags_start=None,
+        tags=None,
     )
 
 
@@ -454,35 +455,41 @@ class OrgDom:
 
     # Writing
     def dump_kw(self, kw):
-        options = kw.match.group('options')
+        options = kw.match.group("options")
         if not options:
-            options = ''
+            options = ""
 
-        return (kw.linenum,
-                '{indentation}#+{key}{options}:{spacing}{value}'.format(
-                    indentation=kw.match.group('indentation'),
-                    key=kw.key,
-                    options=kw.options,
-                    spacing=kw.match.group('spacing'),
-                    value=kw.value,
-                ))
+        return (
+            kw.linenum,
+            "{indentation}#+{key}{options}:{spacing}{value}".format(
+                indentation=kw.match.group("indentation"),
+                key=kw.key,
+                options=kw.options,
+                spacing=kw.match.group("spacing"),
+                value=kw.value,
+            ),
+        )
 
     def dump_property(self, prop: Property):
-        plus = prop.match.group('plus')
-        if plus is None: plus = ''
+        plus = prop.match.group("plus")
+        if plus is None:
+            plus = ""
 
         if isinstance(prop.value, Timestamp):
             value = timestamp_to_string(prop.value)
         else:
             value = prop.value
 
-        return (prop.linenum, '{indentation}:{key}{plus}:{spacing}{value}'.format(
-            indentation=prop.match.group('indentation'),
-            key=prop.key,
-            plus=plus,
-            spacing=prop.match.group('spacing'),
-            value=value,
-        ))
+        return (
+            prop.linenum,
+            "{indentation}:{key}{plus}:{spacing}{value}".format(
+                indentation=prop.match.group("indentation"),
+                key=prop.key,
+                plus=plus,
+                spacing=prop.match.group("spacing"),
+                value=value,
+            ),
+        )
 
     def dump_contents(self, raw):
         if isinstance(raw, RawLine):
@@ -494,7 +501,9 @@ class OrgDom:
         return (structural[0], structural[1])
 
     def dump_headline(self, headline):
-        yield '*' * headline.depth + ' ' + headline.orig.group('spacing') + headline.title
+        yield "*" * headline.depth + " " + headline.orig.group(
+            "spacing"
+        ) + headline.title
 
         lines = []
         KW_T = 0
@@ -523,21 +532,31 @@ class OrgDom:
 
             if ltype == PROPERTIES_T and last_type not in (STRUCTURAL_T, PROPERTIES_T):
                 # No structural opening
-                structured_lines.append(' ' * content.index(':') + ':PROPERTIES:\n')
-                logging.warning("Added structural: ".format(line[1][0], structured_lines[-1].strip()))
-            elif ltype not in (STRUCTURAL_T, PROPERTIES_T) and last_type == PROPERTIES_T:
+                structured_lines.append(" " * content.index(":") + ":PROPERTIES:\n")
+                logging.warning(
+                    "Added structural: ".format(
+                        line[1][0], structured_lines[-1].strip()
+                    )
+                )
+            elif (
+                ltype not in (STRUCTURAL_T, PROPERTIES_T) and last_type == PROPERTIES_T
+            ):
                 # No structural closing
                 last_line = lines[i - 1][1][1]
-                structured_lines.append(' ' * last_line.index(':') + ':END:\n')
-                logging.warning("Added structural:{}: {}".format(line[1][0], structured_lines[-1].strip()))
+                structured_lines.append(" " * last_line.index(":") + ":END:\n")
+                logging.warning(
+                    "Added structural:{}: {}".format(
+                        line[1][0], structured_lines[-1].strip()
+                    )
+                )
 
             elif ltype != CONTENT_T:
-                content = content + '\n'
+                content = content + "\n"
 
             last_type = ltype
             structured_lines.append(content)
 
-        yield ''.join(structured_lines)
+        yield "".join(structured_lines)
 
         for child in headline.children:
             yield from self.dump_headline(child)
@@ -555,8 +574,8 @@ class OrgDom:
         for headline in self.headlines:
             yield from self.dump_headline(headline)
 
-class OrgDomReader:
 
+class OrgDomReader:
     def __init__(self):
         self.headlines: List[Headline] = []
         self.keywords: List[Property] = []
@@ -569,18 +588,18 @@ class OrgDomReader:
     ## Construction
     def add_headline(self, linenum: int, match: re.Match) -> int:
         # Position reader on the proper headline
-        stars = match.group('stars')
+        stars = match.group("stars")
         depth = len(stars)
 
         headline = {
-            'linenum': linenum,
-            'orig': match,
-            'title': match.group('line'),
-            'contents': [],
-            'children': [],
-            'keywords': [],
-            'properties': [],
-            'structural': [],
+            "linenum": linenum,
+            "orig": match,
+            "title": match.group("line"),
+            "contents": [],
+            "children": [],
+            "keywords": [],
+            "properties": [],
+            "structural": [],
         }
 
         while (depth - 2) > len(self.headline_hierarchy):
@@ -592,41 +611,46 @@ class OrgDomReader:
         if depth == 1:
             self.headlines.append(headline)
         else:
-            self.headline_hierarchy[-1]['children'].append(headline)
+            self.headline_hierarchy[-1]["children"].append(headline)
         self.headline_hierarchy.append(headline)
 
-
     def add_keyword_line(self, linenum: int, match: re.Match) -> int:
-        options = match.group('options')
-        kw = Keyword(linenum, match, match.group('key'), match.group('value'), options if options is not None else '')
+        options = match.group("options")
+        kw = Keyword(
+            linenum,
+            match,
+            match.group("key"),
+            match.group("value"),
+            options if options is not None else "",
+        )
         if len(self.headline_hierarchy) == 0:
             self.keywords.append(kw)
         else:
-            self.headline_hierarchy[-1]['keywords'].append(kw)
+            self.headline_hierarchy[-1]["keywords"].append(kw)
 
     def add_raw_line(self, linenum: int, line: str) -> int:
         raw = RawLine(linenum, line)
         if len(self.headline_hierarchy) == 0:
             self.contents.append(raw)
         else:
-            self.headline_hierarchy[-1]['contents'].append(raw)
+            self.headline_hierarchy[-1]["contents"].append(raw)
 
     def add_property_drawer_line(self, linenum: int, line: str, match: re.Match) -> int:
-        self.current_drawer = self.headline_hierarchy[-1]['properties']
-        self.headline_hierarchy[-1]['structural'].append((linenum, line))
+        self.current_drawer = self.headline_hierarchy[-1]["properties"]
+        self.headline_hierarchy[-1]["structural"].append((linenum, line))
 
     def add_drawer_end_line(self, linenum: int, line: str, match: re.Match) -> int:
         self.current_drawer = None
-        self.headline_hierarchy[-1]['structural'].append((linenum, line))
+        self.headline_hierarchy[-1]["structural"].append((linenum, line))
 
     def add_node_properties_line(self, linenum: int, match: re.Match) -> int:
-        key = match.group('key')
-        value = match.group('value').strip()
+        key = match.group("key")
+        value = match.group("value").strip()
 
-        if (value.count('>--<') == 1) or (value.count(']--[') == 1):
+        if (value.count(">--<") == 1) or (value.count("]--[") == 1):
             # Time ranges with two different dates
             # @TODO properly consider "=> DURATION" section
-            chunks = value.split('=').split('--')
+            chunks = value.split("=").split("--")
             as_time_range = parse_org_time(chunks[0], chunks[1])
             if (as_time_range[0] is not None) and (as_time_range[1] is not None):
                 value = TimeRange(as_time_range[0], as_time_range[1])
@@ -636,7 +660,7 @@ class OrgDomReader:
         self.current_drawer.append(Property(linenum, match, key, value, None))
 
     def read(self, s, environment):
-        lines = s.split('\n')
+        lines = s.split("\n")
         reader = enumerate(lines)
 
         for linenum, line in reader:
@@ -653,7 +677,7 @@ class OrgDomReader:
             elif m := NODE_PROPERTIES_RE.match(line):
                 self.add_node_properties_line(linenum, m)
             else:
-                raise NotImplementedError('{}: ‘{}’'.format(linenum, line))
+                raise NotImplementedError("{}: ‘{}’".format(linenum, line))
 
 
 def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=False):
@@ -662,7 +686,9 @@ def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=False):
     dom = doc.finalize()
     if extra_cautious:  # Check that all options can be properly re-serialized
         if dumps(dom) != s:
-            raise NotImplementedError("Error re-serializing, file uses something not implemented")
+            raise NotImplementedError(
+                "Error re-serializing, file uses something not implemented"
+            )
     return dom
 
 
@@ -672,6 +698,6 @@ def load(f, environment=BASE_ENVIRONMENT, extra_cautious=False):
 
 def dumps(doc):
     dump = list(doc.dump())
-    result = '\n'.join(dump)
+    result = "\n".join(dump)
     print(result)
     return result
diff --git a/org_dom/utils.py b/org_dom/utils.py
index ce77add..34747a0 100644
--- a/org_dom/utils.py
+++ b/org_dom/utils.py
@@ -1,4 +1,6 @@
-from .org_dom import Headline, Line, RawLine, Text, Bold, Code, Italic, Strike, Underlined, Verbatim
+from .org_dom import (Bold, Code, Headline, Italic, Line, RawLine, Strike,
+                      Text, Underlined, Verbatim)
+
 
 def get_hl_raw_contents(doc: Headline) -> str:
     lines = []
diff --git a/tests/02-markup.org b/tests/02-markup.org
index 41c2bb2..1de34da 100644
--- a/tests/02-markup.org
+++ b/tests/02-markup.org
@@ -22,6 +22,8 @@
 
   This is a nested *bold =verbatim /italic +strike _underlined ~code .~ ._ .+ ./ .= .*
 
+  This is a interleaved *bold =verbatim /italic +strike _underlined ~code .* .= ./ .+ ._ .~
+
   This is a _ non-underlined phrase because an incorrectly placed content _.
 
   This is a _ non-underlined phrase because an incorrectly placed content beginning_.
diff --git a/tests/test_dom.py b/tests/test_dom.py
index 7e855ba..9321502 100644
--- a/tests/test_dom.py
+++ b/tests/test_dom.py
@@ -5,7 +5,7 @@ from datetime import datetime as DT
 
 from org_dom import dumps, load, loads
 from utils.dom_assertions import (BOLD, CODE, HL, ITALIC, SPAN, STRIKE,
-                                  UNDERLINED, VERBATIM, WEB_LINK, Dom,)
+                                  UNDERLINED, VERBATIM, WEB_LINK, Dom, Tokens)
 
 DIR = os.path.dirname(os.path.abspath(__file__))
 
@@ -47,7 +47,7 @@ class TestSerde(unittest.TestCase):
         self.assertEqual(dumps(doc), orig)
 
     def test_markup_file_02(self):
-        self.maxDiff = 1024
+        self.maxDiff = 10000
         with open(os.path.join(DIR, '02-markup.org')) as f:
             doc = load(f)
 
@@ -82,11 +82,33 @@ class TestSerde(unittest.TestCase):
                                   SPAN("  This is a nested ", BOLD(["bold ", VERBATIM(["verbatim ", ITALIC(["italic ", STRIKE(["strike ", UNDERLINED(["underlined ", CODE("code ."), " ."]), " ."]), " ."]), " ."]), " ."])),
                                   SPAN("\n"),
 
-                                  # SPAN(""),
-                                  # # TODO: THIS IS INTERLEAVED, not nested
-                                  # In ORG:   This is a interleaved *bold =verbatim /italic +strike _underlined ~code .* .= ./ .+ ._ .~
-                                  # SPAN("  This is a nested ", BOLD(["bold ", VERBATIM(["verbatim ", ITALIC(["italic ", STRIKE(["strike ", UNDERLINED(["underlined ", CODE("code ."), " ."]), " ."]), " ."]), " ."]), " ."])),
-                                  # SPAN(""),
+                                  SPAN("\n"),
+                                  # THIS IS INTERLEAVED, not nested
+                                  SPAN(["  This is a interleaved ",
+                                        Tokens.BOLD_START,
+                                        "bold ",
+                                        Tokens.VERBATIM_START,
+                                        "verbatim ",
+                                        Tokens.ITALIC_START,
+                                        "italic ",
+                                        Tokens.STRIKE_START,
+                                        "strike ",
+                                        Tokens.UNDERLINED_START,
+                                        "underlined ",
+                                        Tokens.CODE_START,
+                                        "code .",
+                                        Tokens.BOLD_END,
+                                        " .",
+                                        Tokens.VERBATIM_END,
+                                        " .",
+                                        Tokens.ITALIC_END,
+                                        " .",
+                                        Tokens.STRIKE_END,
+                                        " .",
+                                        Tokens.UNDERLINED_END,
+                                        " .",
+                                        Tokens.CODE_END,
+                                        "\n"]),
 
                                   SPAN("\n"),
                                   SPAN("  This is a _ non-underlined phrase because an incorrectly placed content _.\n"),
diff --git a/tests/utils/dom_assertions.py b/tests/utils/dom_assertions.py
index b8aafa8..0a69372 100644
--- a/tests/utils/dom_assertions.py
+++ b/tests/utils/dom_assertions.py
@@ -2,7 +2,8 @@ import collections
 import unittest
 from datetime import datetime
 
-from org_dom import Line, Text, Bold, Code, Italic, Strike, Underlined, Verbatim, get_raw_contents
+from org_dom import (Bold, Code, Italic, Line, Strike, Text, Underlined,
+                     Verbatim, get_raw_contents)
 
 
 def timestamp_to_datetime(ts):
@@ -13,7 +14,7 @@ def get_raw(doc):
     if isinstance(doc, str):
         return doc
     elif isinstance(doc, list):
-        return ''.join([get_raw(e) for e in doc])
+        return "".join([get_raw(e) for e in doc])
     else:
         return doc.get_raw()
 
@@ -44,8 +45,7 @@ class Dom:
             test_case.assertEqual(len(doc.getTopHeadlines()), 0, "Top")
         else:
             doc_headlines = doc.getTopHeadlines()
-            test_case.assertEqual(len(doc_headlines), len(self.children),
-                                  "Top")
+            test_case.assertEqual(len(doc_headlines), len(self.children), "Top")
 
             for i, children in enumerate(self.children):
                 children.assert_matches(test_case, doc_headlines[i])
@@ -72,7 +72,8 @@ class HL:
                 test_case.assertEqual(doc_props[i].key, prop[0])
                 if isinstance(prop[1], datetime):
                     test_case.assertEqual(
-                        timestamp_to_datetime(doc_props[i].value), prop[1])
+                        timestamp_to_datetime(doc_props[i].value), prop[1]
+                    )
 
         test_case.assertEqual(get_raw_contents(doc), self.get_raw())
 
@@ -81,14 +82,13 @@ class HL:
             test_case.assertEqual(len(doc.children), 0)
         else:
             doc_headlines = doc.children
-            test_case.assertEqual(len(doc_headlines), len(self.children),
-                                  self.title)
+            test_case.assertEqual(len(doc_headlines), len(self.children), self.title)
 
             for i, children in enumerate(self.children):
                 children.assert_matches(test_case, doc_headlines[i])
 
     def get_raw(self):
-        return ''.join(map(get_raw, self.content))
+        return "".join(map(get_raw, self.content))
 
 
 class SPAN:
@@ -100,10 +100,16 @@ class SPAN:
         for section in self.contents:
             if isinstance(section, str):
                 chunks.append(section)
+            elif isinstance(section, list):
+                for subsection in section:
+                    if isinstance(subsection, str):
+                        chunks.append(subsection)
+                    else:
+                        chunks.append(subsection.get_raw())
             else:
                 chunks.append(section.get_raw())
 
-        return ''.join(chunks)
+        return "".join(chunks)
 
     def assert_matches(self, test_case, doc):
         if not isinstance(doc, Line):
@@ -121,7 +127,7 @@ class BOLD:
         self.text = text
 
     def get_raw(self):
-        return '*{}*'.format(get_raw(self.text))
+        return "*{}*".format(get_raw(self.text))
 
     def assertEqual(self, test_case, other):
         test_case.assertTrue(isinstance(other, Bold))
@@ -133,29 +139,31 @@ class CODE:
         self.text = text
 
     def get_raw(self):
-        return '~{}~'.format(get_raw(self.text))
+        return "~{}~".format(get_raw(self.text))
 
     def assertEqual(self, test_case, other):
         test_case.assertTrue(isinstance(other, Code))
         test_case.assertEqual(self.text, other.contents)
 
+
 class ITALIC:
     def __init__(self, text):
         self.text = text
 
     def get_raw(self):
-        return '/{}/'.format(get_raw(self.text))
+        return "/{}/".format(get_raw(self.text))
 
     def assertEqual(self, test_case, other):
         test_case.assertTrue(isinstance(other, Italic))
         test_case.assertEqual(self.text, other.contents)
 
+
 class STRIKE:
     def __init__(self, text):
         self.text = text
 
     def get_raw(self):
-        return '+{}+'.format(get_raw(self.text))
+        return "+{}+".format(get_raw(self.text))
 
     def assertEqual(self, test_case, other):
         test_case.assertTrue(isinstance(other, Strike))
@@ -167,32 +175,54 @@ class UNDERLINED:
         self.text = text
 
     def get_raw(self):
-        return '_{}_'.format(get_raw(self.text))
+        return "_{}_".format(get_raw(self.text))
 
     def assertEqual(self, test_case, other):
         test_case.assertTrue(isinstance(other, Underlined))
         test_case.assertEqual(self.text, other.contents)
 
+
 class VERBATIM:
     def __init__(self, text):
         self.text = text
 
     def get_raw(self):
-        return '={}='.format(get_raw(self.text))
+        return "={}=".format(get_raw(self.text))
 
     def assertEqual(self, test_case, other):
         test_case.assertTrue(isinstance(other, Verbatim))
         test_case.assertEqual(self.text, other.contents)
 
+
 class WEB_LINK:
     def __init__(self, text, link):
         self.text = text
         self.link = link
 
     def get_raw(self):
-        return '[[{}][{}]]'.format(self.link, self.text)
+        return "[[{}][{}]]".format(self.link, self.text)
 
     def assertEqual(self, test_case, other):
         test_case.assertTrue(isinstance(other, WebLink))
         test_case.assertEqual(self.text, other.contents)
         test_case.assertEqual(self.link, other.link)
+
+
+class Tokens:
+    BOLD_END = "*"
+    BOLD_START = "*"
+
+    VERBATIM_START = "="
+    VERBATIM_END = "="
+
+    ITALIC_START = "/"
+    ITALIC_END = "/"
+
+    STRIKE_START = "+"
+    STRIKE_END = "+"
+
+    UNDERLINED_START = "_"
+    UNDERLINED_END = "_"
+
+    CODE_START = "~"
+    CODE_END = "~"