From 0dab7e4703f8a238897ac520caa575250a87fadd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Sat, 27 Jun 2020 19:20:34 +0200
Subject: [PATCH] Add base support for markup tests.

---
 org_dom/__init__.py           |  1 +
 org_dom/org_dom.py            | 60 ++++++++++++++++++++-----
 org_dom/utils.py              | 22 ++++++++++
 tests/02-markup.org           | 21 +++++++++
 tests/test_dom.py             | 44 +++++++++++++++++--
 tests/utils/dom_assertions.py | 83 ++++++++++++++++++++++++++++++++---
 6 files changed, 209 insertions(+), 22 deletions(-)
 create mode 100644 org_dom/utils.py
 create mode 100644 tests/02-markup.org
diff --git a/org_dom/__init__.py b/org_dom/__init__.py
index 522a603..28c7ee6 100644
--- a/org_dom/__init__.py
+++ b/org_dom/__init__.py
@@ -1 +1,2 @@
 from .org_dom import *
+from .utils import *
diff --git a/org_dom/org_dom.py b/org_dom/org_dom.py
index a4af677..68f8d80 100644
--- a/org_dom/org_dom.py
+++ b/org_dom/org_dom.py
@@ -55,12 +55,14 @@ INACTIVE_TIME_STAMP_RE = re.compile(r'\[{}\]'.format(BASE_TIME_STAMP_RE))
 #                       r'(?P<end_year>\d{4})-(?P<end_month>\d{2})-(?P<end_day>\d{2}) (?P<end_dow>[^ ]+)((?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?')
 
 Headline = collections.namedtuple('Headline', ('start_line', 'depth',
-                                               'keyword_start', 'keyword',
+                                               'orig',
+                                               'properties', 'keywords',
                                                'priority_start', 'priority',
                                                'title_start', 'title',
                                                'tags_start', 'tags',
-                                               'content',
+                                               'contents',
                                                'children',
+                                               'structural',
 ))
 
 RawLine = collections.namedtuple('RawLine', ('linenum', 'line'))
@@ -107,9 +109,43 @@ def timestamp_to_string(ts):
     else:
         return '[{}]'.format(base)
 
+def parse_headline(hl) -> Headline:
+    # 'linenum': linenum,
+    # 'orig': match,
+    # 'title': match.group('line'),
+    # 'contents': [],
+    # 'children': [],
+    # 'keywords': [],
+    # 'properties': [],
+    # 'structural': [],
+    # HEADLINE_RE = re.compile(r'^(?P<stars>\*+) (?P<spacing>\s*)(?P<line>.*)$')
+    stars = hl['orig'].group('stars')
+    depth = len(stars)
+
+    # TODO: Parse line for priority, cookies and tags
+    line = hl['orig'].group('line')
+    title = line.strip()
+
+    return Headline(start_line=hl['linenum'],
+                    depth=depth,
+                    orig=hl['orig'],
+                    title=title,
+                    contents=hl['contents'],
+                    children=[parse_headline(child) for child in hl['children']],
+                    keywords=hl['keywords'],
+                    properties=hl['properties'],
+                    structural=hl['structural'],
+                    title_start=None,
+                    priority=None,
+                    priority_start=None,
+                    tags_start=None,
+                    tags=None,
+    )
+
+
 class OrgDom:
     def __init__(self, headlines, keywords, contents):
-        self.headlines: List[Headline] = headlines
+        self.headlines: List[Headline] = list(map(parse_headline, headlines))
         self.keywords: List[Property] = keywords
         self.contents: List[RawLine] = contents
 
@@ -162,23 +198,23 @@ class OrgDom:
         return (structural[0], structural[1])
 
     def dump_headline(self, headline):
-        yield headline['orig'].group('stars') + ' ' + headline['orig'].group('spacing') + headline['orig'].group('line')
+        yield '*' * headline.depth + ' ' + headline.orig.group('spacing') + headline.title
 
         lines = []
         KW_T = 0
         CONTENT_T = 1
         PROPERTIES_T = 2
         STRUCTURAL_T = 3
-        for keyword in headline['keywords']:
+        for keyword in headline.keywords:
             lines.append((KW_T, self.dump_kw(keyword)))
 
-        for content in headline['contents']:
+        for content in headline.contents:
             lines.append((CONTENT_T, self.dump_contents(content)))
 
-        for prop in headline['properties']:
+        for prop in headline.properties:
             lines.append((PROPERTIES_T, self.dump_property(prop)))
 
-        for struct in headline['structural']:
+        for struct in headline.structural:
             lines.append((STRUCTURAL_T, self.dump_structural(struct)))
 
         lines = sorted(lines, key=lambda x: x[1][0])
@@ -204,7 +240,7 @@ class OrgDom:
 
         yield from structured_lines
 
-        for child in headline['children']:
+        for child in headline.children:
             yield from self.dump_headline(child)
 
     def dump(self):
@@ -235,7 +271,7 @@ class OrgDomReader:
     def add_headline(self, linenum: int, match: re.Match) -> int:
         # Position reader on the proper headline
         stars = match.group('stars')
-        depth = len(stars) - 1
+        depth = len(stars)
 
         headline = {
             'linenum': linenum,
@@ -248,13 +284,13 @@ class OrgDomReader:
             'structural': [],
         }
 
-        while (depth - 1) > len(self.headline_hierarchy):
+        while (depth - 2) > len(self.headline_hierarchy):
             # Introduce structural headlines
             self.headline_hierarchy.append(None)
         while depth < len(self.headline_hierarchy):
             self.headline_hierarchy.pop()
 
-        if depth == 0:
+        if depth == 1:
             self.headlines.append(headline)
         else:
             self.headline_hierarchy[-1]['children'].append(headline)
diff --git a/org_dom/utils.py b/org_dom/utils.py
new file mode 100644
index 0000000..8be37ad
--- /dev/null
+++ b/org_dom/utils.py
@@ -0,0 +1,22 @@
+from .org_dom import Headline, RawLine
+
+
+def get_hl_raw_contents(doc: Headline) -> str:
+    lines = []
+
+    for content in doc.contents:
+        lines.append(get_raw_contents(content))
+
+    return '\n'.join(lines)
+
+
+def get_rawline_contents(doc: RawLine) -> str:
+    return doc.line
+
+
+def get_raw_contents(doc) -> str:
+    if isinstance(doc, Headline):
+        return get_hl_raw_contents(doc)
+    if isinstance(doc, RawLine):
+        return get_rawline_contents(doc)
+    raise NotImplementedError('Unhandled type: ' + str(doc))
diff --git a/tests/02-markup.org b/tests/02-markup.org
new file mode 100644
index 0000000..18d58f5
--- /dev/null
+++ b/tests/02-markup.org
@@ -0,0 +1,21 @@
+#+TITLE: 02-Markup
+#+DESCRIPTION: Simple org file to test markup
+#+TODO: TODO(t) PAUSED(p) |  DONE(d)
+
+
+* First level
+  :PROPERTIES:
+  :ID:       02-markup-first-level-id
+  :CREATED:  [2020-01-01 Wed 01:01]
+  :END:
+  This is a *bold phrase*.
+
+  This is a =verbatim phrase=.
+
+  This is a /italic phrase/.
+
+  This is a +strike-through phrase+.
+
+  This is a _underlined phrase_.
+
+  This is a ~code phrase~.
diff --git a/tests/test_dom.py b/tests/test_dom.py
index bef59ec..6659a78 100644
--- a/tests/test_dom.py
+++ b/tests/test_dom.py
@@ -4,7 +4,8 @@ import unittest
 from datetime import datetime as DT
 
 from org_dom import dumps, load, loads
-from utils.dom_assertions import HL, Dom
+from utils.dom_assertions import (BOLD, CODE, HL, ITALIC, SPAN, STRIKE,
+                                  UNDERLINED, VERBATIM, Dom)
 
 DIR = os.path.dirname(os.path.abspath(__file__))
 
@@ -23,15 +24,15 @@ class TestSerde(unittest.TestCase):
                          ('ID', '01-simple-first-level-id'),
                          ('CREATED', DT(2020, 1, 1, 1, 1)),
                      ],
-                     content='First level content',
+                     content='  First level content\n',
                      children=[
                          HL('Second level',
                             props=[('ID', '01-simple-second-level-id')],
-                            content='Second level content',
+                            content='\n   Second level content\n',
                             children=[
                                 HL('Third level',
                                    props=[('ID', '01-simple-third-level-id')],
-                                   content='Third level content')
+                                   content='\n    Third level content\n')
                             ])
                      ])))
 
@@ -44,3 +45,38 @@ class TestSerde(unittest.TestCase):
             doc = loads(orig)
 
         self.assertEqual(dumps(doc), orig)
+
+    def test_markup_file_02(self):
+        with open(os.path.join(DIR, '02-markup.org')) as f:
+            doc = load(f)
+
+        ex = Dom(props=[('TITLE', '02-Markup'),
+                        ('DESCRIPTION', 'Simple org file to test markup'),
+                        ('TODO', 'TODO(t) PAUSED(p) |  DONE(d)')],
+                 children=(HL('First level',
+                              props=[
+                                  ('ID', '02-markup-first-level-id'),
+                                  ('CREATED', DT(2020, 1, 1, 1, 1)),
+                              ],
+                              content=[
+                                  SPAN("  This is a ", BOLD("bold phrase"),
+                                       "."),
+                                  SPAN(""),
+                                  SPAN("  This is a ",
+                                       VERBATIM("verbatim phrase"), "."),
+                                  SPAN(""),
+                                  SPAN("  This is a ", ITALIC("italic phrase"),
+                                       "."),
+                                  SPAN(""),
+                                  SPAN("  This is a ",
+                                       STRIKE("strike-through phrase"), "."),
+                                  SPAN(""),
+                                  SPAN("  This is a ",
+                                       UNDERLINED("underlined phrase"), "."),
+                                  SPAN(""),
+                                  SPAN("  This is a ", CODE("code phrase"),
+                                       "."),
+                                  SPAN(""),
+                              ])))
+
+        ex.assert_matches(self, doc)
diff --git a/tests/utils/dom_assertions.py b/tests/utils/dom_assertions.py
index 3562687..c69ad09 100644
--- a/tests/utils/dom_assertions.py
+++ b/tests/utils/dom_assertions.py
@@ -2,6 +2,8 @@ import collections
 import unittest
 from datetime import datetime
 
+from org_dom import get_raw_contents
+
 
 def timestamp_to_datetime(ts):
     return datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute)
@@ -48,13 +50,13 @@ class HL:
         self.children = children
 
     def assert_matches(self, test_case: unittest.TestCase, doc):
-        test_case.assertEqual(self.title, doc['title'])
+        test_case.assertEqual(self.title, doc.title)
 
         # Check properties
         if self.props is None:
-            test_case.assertEqual(len(doc['properties']), 0)
+            test_case.assertEqual(len(doc.properties), 0)
         else:
-            doc_props = doc['properties']
+            doc_props = doc.properties
             test_case.assertEqual(len(doc_props), len(self.props))
 
             for i, prop in enumerate(self.props):
@@ -63,15 +65,84 @@ class HL:
                     test_case.assertEqual(
                         timestamp_to_datetime(doc_props[i].value), prop[1])
 
-        # @TODO: Check properties
+        if isinstance(self.content, str):
+            test_case.assertEqual(get_raw_contents(doc), self.content)
+        else:
+            test_case.assertEqual(len(doc.contents), len(self.content))
+            for i, content in enumerate(self.content):
+                test_case.assertEqual(get_raw_contents(doc.contents[i]),
+                                      content.to_raw())
 
         # Check children
         if self.children is None:
-            test_case.assertEqual(len(doc['children']), 0)
+            test_case.assertEqual(len(doc.children), 0)
         else:
-            doc_headlines = doc['children']
+            doc_headlines = doc.children
             test_case.assertEqual(len(doc_headlines), len(self.children),
                                   self.title)
 
             for i, children in enumerate(self.children):
                 children.assert_matches(test_case, doc_headlines[i])
+
+
+class SPAN:
+    def __init__(self, *kwargs):
+        self.contents = kwargs
+
+    def to_raw(self):
+        chunks = []
+        for section in self.contents:
+            if isinstance(section, str):
+                chunks.append(section)
+            else:
+                chunks.append(section.to_raw())
+
+        return ''.join(chunks)
+
+
+class BOLD:
+    def __init__(self, text):
+        self.text = text
+
+    def to_raw(self):
+        return '*{}*'.format(self.text)
+
+
+class CODE:
+    def __init__(self, text):
+        self.text = text
+
+    def to_raw(self):
+        return '~{}~'.format(self.text)
+
+
+class ITALIC:
+    def __init__(self, text):
+        self.text = text
+
+    def to_raw(self):
+        return '/{}/'.format(self.text)
+
+
+class STRIKE:
+    def __init__(self, text):
+        self.text = text
+
+    def to_raw(self):
+        return '+{}+'.format(self.text)
+
+
+class UNDERLINED:
+    def __init__(self, text):
+        self.text = text
+
+    def to_raw(self):
+        return '_{}_'.format(self.text)
+
+
+class VERBATIM:
+    def __init__(self, text):
+        self.text = text
+
+    def to_raw(self):
+        return '={}='.format(self.text)