From 5b886e5e242928a799d56a1e2cc1ae48a96e6f61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Sat, 10 Oct 2020 00:39:32 +0200
Subject: [PATCH] Pass markup tests.

---
 org_dom/org_dom.py            | 199 +++++++++++++++++++++++++++++++---
 org_dom/utils.py              |  13 ++-
 tests/test_dom.py             |   1 -
 tests/utils/dom_assertions.py |  39 ++++++-
 4 files changed, 230 insertions(+), 22 deletions(-)

diff --git a/org_dom/org_dom.py b/org_dom/org_dom.py
index 68f8d80..8ff5746 100644
--- a/org_dom/org_dom.py
+++ b/org_dom/org_dom.py
@@ -109,28 +109,187 @@ def timestamp_to_string(ts):
     else:
         return '[{}]'.format(base)
 
+
+class Line:
+    def __init__(self, linenum, contents):
+        self.linenum = linenum
+        self.contents = contents
+
+    def get_raw(self):
+        rawchunks = []
+        for chunk in self.contents:
+            if isinstance(chunk, str):
+                rawchunks.append(chunk)
+            else:
+                rawchunks.append(chunk.get_raw())
+        return ''.join(rawchunks) + '\n'
+
+class Text:
+    def __init__(self, contents):
+        self.contents = contents
+
+    def get_raw(self):
+        raw = ''.join(self.contents)
+        return raw
+
+class Bold:
+    def __init__(self, contents):
+        self.contents = contents
+
+    def get_raw(self):
+        raw = ''.join(self.contents)
+        return f"*{raw}*"
+
+class Code:
+    def __init__(self, contents):
+        self.contents = contents
+
+    def get_raw(self):
+        raw = ''.join(self.contents)
+        return f"~{raw}~"
+
+class Italic:
+    def __init__(self, contents):
+        self.contents = contents
+
+    def get_raw(self):
+        raw = ''.join(self.contents)
+        return f"/{raw}/"
+
+class Strike:
+    def __init__(self, contents):
+        self.contents = contents
+
+    def get_raw(self):
+        raw = ''.join(self.contents)
+        return f"+{raw}+"
+
+class Underlined:
+    def __init__(self, contents):
+        self.contents = contents
+
+    def get_raw(self):
+        raw = ''.join(self.contents)
+        return f"_{raw}_"
+
+class Verbatim:
+    def __init__(self, contents):
+        self.contents = contents
+
+    def get_raw(self):
+        raw = ''.join(self.contents)
+        return f"={raw}="
+
+
+def parse_contents(raw_contents:List[RawLine]):
+    NO_MODE = 0
+    BOLD_MODE = 1
+    CODE_MODE = 2
+    ITALIC_MODE = 3
+    STRIKE_MODE = 4
+    UNDERLINED_MODE = 5
+    VERBATIM_MODE = 6
+
+    MODE_CLASS = {
+        NO_MODE: Line,
+        BOLD_MODE: Bold,
+        CODE_MODE: Code,
+        ITALIC_MODE: Italic,
+        STRIKE_MODE: Strike,
+        UNDERLINED_MODE: Underlined,
+        VERBATIM_MODE: Verbatim,
+    }
+
+    mode = NO_MODE
+    escaped = False
+
+    chunk = []
+    inline = []
+    chunks = []
+
+    linenum = start_linenum = raw_contents[0].linenum
+    contents_buff = []
+    for line in raw_contents:
+        contents_buff.append(line.line)
+
+    contents = '\n'.join(contents_buff)
+
+    for c in contents:
+        if mode == NO_MODE:
+            if escaped:
+                chunk.append(c)
+                escaped = False
+
+            elif c == '\\':
+                escaped = True
+            elif c == '*':
+                mode = BOLD_MODE
+            elif c == '~':
+                mode = CODE_MODE
+            elif c == '/':
+                mode = ITALIC_MODE
+            elif c == '+':
+                mode = STRIKE_MODE
+            elif c == '_':
+                mode = UNDERLINED_MODE
+            elif c == '=':
+                mode = VERBATIM_MODE
+            elif c == '\n':
+                chunks.append(Line(linenum, inline + [Text(chunk)]))
+                chunk = []
+                inline = []
+            else:
+                chunk.append(c)
+
+            if mode != NO_MODE:
+                inline.append(Text([''.join(chunk)]))
+                chunk = []
+        else:
+            if escaped:
+                chunk.append(c)
+                escaped = False
+
+            was_mode = mode
+            if mode == BOLD_MODE and c == '*':
+                mode = NO_MODE
+            elif mode == CODE_MODE and c == '~':
+                mode = NO_MODE
+            elif mode == ITALIC_MODE and c == '/':
+                mode = NO_MODE
+            elif mode == STRIKE_MODE and c == '+':
+                mode = NO_MODE
+            elif mode == UNDERLINED_MODE and c == '_':
+                mode = NO_MODE
+            elif mode == VERBATIM_MODE and c == '=':
+                mode = NO_MODE
+            elif c == '\n':
+                raise NotImplementedError("[{} | {}]".format(c, chunk))
+            else:
+                chunk.append(c)
+
+            if mode == NO_MODE:
+                inline.append(MODE_CLASS[was_mode](''.join(chunk)))
+                chunk = []
+
+    assert(len(chunk) == 0)
+    assert(len(inline) == 0)
+
+    return chunks
+
 def parse_headline(hl) -> Headline:
-    # 'linenum': linenum,
-    # 'orig': match,
-    # 'title': match.group('line'),
-    # 'contents': [],
-    # 'children': [],
-    # 'keywords': [],
-    # 'properties': [],
-    # 'structural': [],
-    # HEADLINE_RE = re.compile(r'^(?P<stars>\*+) (?P<spacing>\s*)(?P<line>.*)$')
     stars = hl['orig'].group('stars')
     depth = len(stars)
 
     # TODO: Parse line for priority, cookies and tags
     line = hl['orig'].group('line')
     title = line.strip()
+    contents = parse_contents(hl['contents'])
 
     return Headline(start_line=hl['linenum'],
                     depth=depth,
                     orig=hl['orig'],
                     title=title,
-                    contents=hl['contents'],
+                    contents=contents,
                     children=[parse_headline(child) for child in hl['children']],
                     keywords=hl['keywords'],
                     properties=hl['properties'],
@@ -191,8 +350,11 @@ class OrgDom:
             value=value,
         ))
 
-    def dump_contents(self, raw: RawLine):
-        return (raw.linenum, raw.line)
+    def dump_contents(self, raw):
+        if isinstance(raw, RawLine):
+            return (raw.linenum, raw.line)
+        else:
+            return (raw.linenum, raw.get_raw())
 
     def dump_structural(self, structural: Tuple):
         return (structural[0], structural[1])
@@ -227,18 +389,21 @@ class OrgDom:
 
             if ltype == PROPERTIES_T and last_type not in (STRUCTURAL_T, PROPERTIES_T):
                 # No structural opening
-                structured_lines.append(' ' * content.index(':') + ':PROPERTIES:')
+                structured_lines.append(' ' * content.index(':') + ':PROPERTIES:\n')
                 logging.warning("Added structural: ".format(line[1][0], structured_lines[-1].strip()))
             elif ltype not in (STRUCTURAL_T, PROPERTIES_T) and last_type == PROPERTIES_T:
                 # No structural closing
                 last_line = lines[i - 1][1][1]
-                structured_lines.append(' ' * last_line.index(':') + ':END:')
+                structured_lines.append(' ' * last_line.index(':') + ':END:\n')
                 logging.warning("Added structural:{}: {}".format(line[1][0], structured_lines[-1].strip()))
 
+            elif ltype != CONTENT_T:
+                content = content + '\n'
+
             last_type = ltype
             structured_lines.append(content)
 
-        yield from structured_lines
+        yield ''.join(structured_lines)
 
         for child in headline.children:
             yield from self.dump_headline(child)
@@ -372,5 +537,7 @@ def load(f, environment=BASE_ENVIRONMENT, extra_cautious=False):
 
 
 def dumps(doc):
-    result = '\n'.join(doc.dump())
+    dump = list(doc.dump())
+    result = '\n'.join(dump)
+    print(result)
     return result
diff --git a/org_dom/utils.py b/org_dom/utils.py
index 8be37ad..e7c3e8c 100644
--- a/org_dom/utils.py
+++ b/org_dom/utils.py
@@ -1,4 +1,4 @@
-from .org_dom import Headline, RawLine
+from .org_dom import Headline, Line, RawLine
 
 
 def get_hl_raw_contents(doc: Headline) -> str:
@@ -7,16 +7,25 @@ def get_hl_raw_contents(doc: Headline) -> str:
     for content in doc.contents:
         lines.append(get_raw_contents(content))
 
-    return '\n'.join(lines)
+    raw = ''.join(lines)
+    return raw
 
 
 def get_rawline_contents(doc: RawLine) -> str:
     return doc.line
 
 
+def get_span_contents(doc: Line) -> str:
+    return doc.get_raw()
+
+
 def get_raw_contents(doc) -> str:
     if isinstance(doc, Headline):
         return get_hl_raw_contents(doc)
     if isinstance(doc, RawLine):
         return get_rawline_contents(doc)
+    if isinstance(doc, Line):
+        return get_span_contents(doc)
+    if isinstance(doc, list):
+        return ''.join([get_raw_contents(chunk) for chunk in doc])
     raise NotImplementedError('Unhandled type: ' + str(doc))
diff --git a/tests/test_dom.py b/tests/test_dom.py
index 6659a78..5b1f8cb 100644
--- a/tests/test_dom.py
+++ b/tests/test_dom.py
@@ -76,7 +76,6 @@ class TestSerde(unittest.TestCase):
                                   SPAN(""),
                                   SPAN("  This is a ", CODE("code phrase"),
                                        "."),
-                                  SPAN(""),
                               ])))
 
         ex.assert_matches(self, doc)
diff --git a/tests/utils/dom_assertions.py b/tests/utils/dom_assertions.py
index c69ad09..9bd77e7 100644
--- a/tests/utils/dom_assertions.py
+++ b/tests/utils/dom_assertions.py
@@ -2,7 +2,7 @@ import collections
 import unittest
 from datetime import datetime
 
-from org_dom import get_raw_contents
+from org_dom import Line, Text, Bold, Code, Italic, Strike, Underlined, Verbatim, get_raw_contents
 
 
 def timestamp_to_datetime(ts):
@@ -68,10 +68,12 @@ class HL:
         if isinstance(self.content, str):
             test_case.assertEqual(get_raw_contents(doc), self.content)
         else:
+            if len(doc.contents) != len(self.content):
+                print("Contents:", doc.contents)
+                print("Expected:", self.content)
             test_case.assertEqual(len(doc.contents), len(self.content))
             for i, content in enumerate(self.content):
-                test_case.assertEqual(get_raw_contents(doc.contents[i]),
-                                      content.to_raw())
+                content.assert_matches(test_case, doc.contents[i])
 
         # Check children
         if self.children is None:
@@ -99,6 +101,16 @@ class SPAN:
 
         return ''.join(chunks)
 
+    def assert_matches(self, test_case, doc):
+        if not isinstance(doc, Line):
+            return False
+        for i, section in enumerate(self.contents):
+            if isinstance(section, str):
+                test_case.assertTrue(isinstance(doc.contents[i], Text))
+                test_case.assertEqual(section, doc.contents[i].get_raw())
+            else:
+                section.assertEqual(test_case, doc.contents[i])
+
 
 class BOLD:
     def __init__(self, text):
@@ -107,6 +119,10 @@ class BOLD:
     def to_raw(self):
         return '*{}*'.format(self.text)
 
+    def assertEqual(self, test_case, other):
+        test_case.assertTrue(isinstance(other, Bold))
+        test_case.assertEqual(self.text, other.contents)
+
 
 class CODE:
     def __init__(self, text):
@@ -115,6 +131,9 @@ class CODE:
     def to_raw(self):
         return '~{}~'.format(self.text)
 
+    def assertEqual(self, test_case, other):
+        test_case.assertTrue(isinstance(other, Code))
+        test_case.assertEqual(self.text, other.contents)
 
 class ITALIC:
     def __init__(self, text):
@@ -123,6 +142,9 @@ class ITALIC:
     def to_raw(self):
         return '/{}/'.format(self.text)
 
+    def assertEqual(self, test_case, other):
+        test_case.assertTrue(isinstance(other, Italic))
+        test_case.assertEqual(self.text, other.contents)
 
 class STRIKE:
     def __init__(self, text):
@@ -131,6 +153,10 @@ class STRIKE:
     def to_raw(self):
         return '+{}+'.format(self.text)
 
+    def assertEqual(self, test_case, other):
+        test_case.assertTrue(isinstance(other, Strike))
+        test_case.assertEqual(self.text, other.contents)
+
 
 class UNDERLINED:
     def __init__(self, text):
@@ -139,6 +165,9 @@ class UNDERLINED:
     def to_raw(self):
         return '_{}_'.format(self.text)
 
+    def assertEqual(self, test_case, other):
+        test_case.assertTrue(isinstance(other, Underlined))
+        test_case.assertEqual(self.text, other.contents)
 
 class VERBATIM:
     def __init__(self, text):
@@ -146,3 +175,7 @@ class VERBATIM:
 
     def to_raw(self):
         return '={}='.format(self.text)
+
+    def assertEqual(self, test_case, other):
+        test_case.assertTrue(isinstance(other, Verbatim))
+        test_case.assertEqual(self.text, other.contents)