Add base support for markup tests.

2020-06-27 19:20:34 +02:00 · 2020-06-27 19:20:34 +02:00 · 0dab7e4703
commit 0dab7e4703
parent d23ee1adba
6 changed files with 209 additions and 22 deletions
--- a/org_dom/init.py
+++ b/org_dom/init.py
@ -1 +1,2 @@
 from .org_dom import *
+from .utils import *
--- a/org_dom/org_dom.py
+++ b/org_dom/org_dom.py
@ -55,12 +55,14 @@ INACTIVE_TIME_STAMP_RE = re.compile(r'\[{}\]'.format(BASE_TIME_STAMP_RE))
 #                       r'(?P<end_year>\d{4})-(?P<end_month>\d{2})-(?P<end_day>\d{2}) (?P<end_dow>[^ ]+)((?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?')

 Headline = collections.namedtuple('Headline', ('start_line', 'depth',
-                                               'keyword_start', 'keyword',
+                                               'orig',
+                                               'properties', 'keywords',
                                               'priority_start', 'priority',
                                               'title_start', 'title',
                                               'tags_start', 'tags',
-                                               'content',
+                                               'contents',
                                               'children',
+                                               'structural',
 ))

 RawLine = collections.namedtuple('RawLine', ('linenum', 'line'))
@ -107,9 +109,43 @@ def timestamp_to_string(ts):
    else:
        return '[{}]'.format(base)

+def parse_headline(hl) -> Headline:
+    # 'linenum': linenum,
+    # 'orig': match,
+    # 'title': match.group('line'),
+    # 'contents': [],
+    # 'children': [],
+    # 'keywords': [],
+    # 'properties': [],
+    # 'structural': [],
+    # HEADLINE_RE = re.compile(r'^(?P<stars>\*+) (?P<spacing>\s*)(?P<line>.*)$')
+    stars = hl['orig'].group('stars')
+    depth = len(stars)
+
+    # TODO: Parse line for priority, cookies and tags
+    line = hl['orig'].group('line')
+    title = line.strip()
+
+    return Headline(start_line=hl['linenum'],
+                    depth=depth,
+                    orig=hl['orig'],
+                    title=title,
+                    contents=hl['contents'],
+                    children=[parse_headline(child) for child in hl['children']],
+                    keywords=hl['keywords'],
+                    properties=hl['properties'],
+                    structural=hl['structural'],
+                    title_start=None,
+                    priority=None,
+                    priority_start=None,
+                    tags_start=None,
+                    tags=None,
+    )
+
+
 class OrgDom:
    def __init__(self, headlines, keywords, contents):
-        self.headlines: List[Headline] = headlines
+        self.headlines: List[Headline] = list(map(parse_headline, headlines))
        self.keywords: List[Property] = keywords
        self.contents: List[RawLine] = contents

@ -162,23 +198,23 @@ class OrgDom:
        return (structural[0], structural[1])

    def dump_headline(self, headline):
-        yield headline['orig'].group('stars') + ' ' + headline['orig'].group('spacing') + headline['orig'].group('line')
+        yield '*' * headline.depth + ' ' + headline.orig.group('spacing') + headline.title

        lines = []
        KW_T = 0
        CONTENT_T = 1
        PROPERTIES_T = 2
        STRUCTURAL_T = 3
-        for keyword in headline['keywords']:
+        for keyword in headline.keywords:
            lines.append((KW_T, self.dump_kw(keyword)))

-        for content in headline['contents']:
+        for content in headline.contents:
            lines.append((CONTENT_T, self.dump_contents(content)))

-        for prop in headline['properties']:
+        for prop in headline.properties:
            lines.append((PROPERTIES_T, self.dump_property(prop)))

-        for struct in headline['structural']:
+        for struct in headline.structural:
            lines.append((STRUCTURAL_T, self.dump_structural(struct)))

        lines = sorted(lines, key=lambda x: x[1][0])
@ -204,7 +240,7 @@ class OrgDom:

        yield from structured_lines

-        for child in headline['children']:
+        for child in headline.children:
            yield from self.dump_headline(child)

    def dump(self):
@ -235,7 +271,7 @@ class OrgDomReader:
    def add_headline(self, linenum: int, match: re.Match) -> int:
        # Position reader on the proper headline
        stars = match.group('stars')
-        depth = len(stars) - 1
+        depth = len(stars)

        headline = {
            'linenum': linenum,
@ -248,13 +284,13 @@ class OrgDomReader:
            'structural': [],
        }

-        while (depth - 1) > len(self.headline_hierarchy):
+        while (depth - 2) > len(self.headline_hierarchy):
            # Introduce structural headlines
            self.headline_hierarchy.append(None)
        while depth < len(self.headline_hierarchy):
            self.headline_hierarchy.pop()

-        if depth == 0:
+        if depth == 1:
            self.headlines.append(headline)
        else:
            self.headline_hierarchy[-1]['children'].append(headline)
--- a/org_dom/utils.py
+++ b/org_dom/utils.py
@ -0,0 +1,22 @@
+from .org_dom import Headline, RawLine
+
+
+def get_hl_raw_contents(doc: Headline) -> str:
+    lines = []
+
+    for content in doc.contents:
+        lines.append(get_raw_contents(content))
+
+    return '\n'.join(lines)
+
+
+def get_rawline_contents(doc: RawLine) -> str:
+    return doc.line
+
+
+def get_raw_contents(doc) -> str:
+    if isinstance(doc, Headline):
+        return get_hl_raw_contents(doc)
+    if isinstance(doc, RawLine):
+        return get_rawline_contents(doc)
+    raise NotImplementedError('Unhandled type: ' + str(doc))
--- a/tests/02-markup.org
+++ b/tests/02-markup.org
@ -0,0 +1,21 @@
+#+TITLE: 02-Markup
+#+DESCRIPTION: Simple org file to test markup
+#+TODO: TODO(t) PAUSED(p) |  DONE(d)
+
+
+* First level
+  :PROPERTIES:
+  :ID:       02-markup-first-level-id
+  :CREATED:  [2020-01-01 Wed 01:01]
+  :END:
+  This is a *bold phrase*.
+
+  This is a =verbatim phrase=.
+
+  This is a /italic phrase/.
+
+  This is a +strike-through phrase+.
+
+  This is a _underlined phrase_.
+
+  This is a ~code phrase~.
--- a/tests/test_dom.py
+++ b/tests/test_dom.py
@ -4,7 +4,8 @@ import unittest
 from datetime import datetime as DT

 from org_dom import dumps, load, loads
-from utils.dom_assertions import HL, Dom
+from utils.dom_assertions import (BOLD, CODE, HL, ITALIC, SPAN, STRIKE,
+                                  UNDERLINED, VERBATIM, Dom)

 DIR = os.path.dirname(os.path.abspath(__file__))

@ -23,15 +24,15 @@ class TestSerde(unittest.TestCase):
                         ('ID', '01-simple-first-level-id'),
                         ('CREATED', DT(2020, 1, 1, 1, 1)),
                     ],
-                     content='First level content',
+                     content='  First level content\n',
                     children=[
                         HL('Second level',
                            props=[('ID', '01-simple-second-level-id')],
-                            content='Second level content',
+                            content='\n   Second level content\n',
                            children=[
                                HL('Third level',
                                   props=[('ID', '01-simple-third-level-id')],
-                                   content='Third level content')
+                                   content='\n    Third level content\n')
                            ])
                     ])))

@ -44,3 +45,38 @@ class TestSerde(unittest.TestCase):
            doc = loads(orig)

        self.assertEqual(dumps(doc), orig)
+
+    def test_markup_file_02(self):
+        with open(os.path.join(DIR, '02-markup.org')) as f:
+            doc = load(f)
+
+        ex = Dom(props=[('TITLE', '02-Markup'),
+                        ('DESCRIPTION', 'Simple org file to test markup'),
+                        ('TODO', 'TODO(t) PAUSED(p) |  DONE(d)')],
+                 children=(HL('First level',
+                              props=[
+                                  ('ID', '02-markup-first-level-id'),
+                                  ('CREATED', DT(2020, 1, 1, 1, 1)),
+                              ],
+                              content=[
+                                  SPAN("  This is a ", BOLD("bold phrase"),
+                                       "."),
+                                  SPAN(""),
+                                  SPAN("  This is a ",
+                                       VERBATIM("verbatim phrase"), "."),
+                                  SPAN(""),
+                                  SPAN("  This is a ", ITALIC("italic phrase"),
+                                       "."),
+                                  SPAN(""),
+                                  SPAN("  This is a ",
+                                       STRIKE("strike-through phrase"), "."),
+                                  SPAN(""),
+                                  SPAN("  This is a ",
+                                       UNDERLINED("underlined phrase"), "."),
+                                  SPAN(""),
+                                  SPAN("  This is a ", CODE("code phrase"),
+                                       "."),
+                                  SPAN(""),
+                              ])))
+
+        ex.assert_matches(self, doc)
--- a/tests/utils/dom_assertions.py
+++ b/tests/utils/dom_assertions.py
@ -2,6 +2,8 @@ import collections
 import unittest
 from datetime import datetime

+from org_dom import get_raw_contents
+

 def timestamp_to_datetime(ts):
    return datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute)
@ -48,13 +50,13 @@ class HL:
        self.children = children

    def assert_matches(self, test_case: unittest.TestCase, doc):
-        test_case.assertEqual(self.title, doc['title'])
+        test_case.assertEqual(self.title, doc.title)

        # Check properties
        if self.props is None:
-            test_case.assertEqual(len(doc['properties']), 0)
+            test_case.assertEqual(len(doc.properties), 0)
        else:
-            doc_props = doc['properties']
+            doc_props = doc.properties
            test_case.assertEqual(len(doc_props), len(self.props))

            for i, prop in enumerate(self.props):
@ -63,15 +65,84 @@ class HL:
                    test_case.assertEqual(
                        timestamp_to_datetime(doc_props[i].value), prop[1])

-        # @TODO: Check properties
+        if isinstance(self.content, str):
+            test_case.assertEqual(get_raw_contents(doc), self.content)
+        else:
+            test_case.assertEqual(len(doc.contents), len(self.content))
+            for i, content in enumerate(self.content):
+                test_case.assertEqual(get_raw_contents(doc.contents[i]),
+                                      content.to_raw())

        # Check children
        if self.children is None:
-            test_case.assertEqual(len(doc['children']), 0)
+            test_case.assertEqual(len(doc.children), 0)
        else:
-            doc_headlines = doc['children']
+            doc_headlines = doc.children
            test_case.assertEqual(len(doc_headlines), len(self.children),
                                  self.title)

            for i, children in enumerate(self.children):
                children.assert_matches(test_case, doc_headlines[i])
+
+
+class SPAN:
+    def __init__(self, *kwargs):
+        self.contents = kwargs
+
+    def to_raw(self):
+        chunks = []
+        for section in self.contents:
+            if isinstance(section, str):
+                chunks.append(section)
+            else:
+                chunks.append(section.to_raw())
+
+        return ''.join(chunks)
+
+
+class BOLD:
+    def __init__(self, text):
+        self.text = text
+
+    def to_raw(self):
+        return '*{}*'.format(self.text)
+
+
+class CODE:
+    def __init__(self, text):
+        self.text = text
+
+    def to_raw(self):
+        return '~{}~'.format(self.text)
+
+
+class ITALIC:
+    def __init__(self, text):
+        self.text = text
+
+    def to_raw(self):
+        return '/{}/'.format(self.text)
+
+
+class STRIKE:
+    def __init__(self, text):
+        self.text = text
+
+    def to_raw(self):
+        return '+{}+'.format(self.text)
+
+
+class UNDERLINED:
+    def __init__(self, text):
+        self.text = text
+
+    def to_raw(self):
+        return '_{}_'.format(self.text)
+
+
+class VERBATIM:
+    def __init__(self, text):
+        self.text = text
+
+    def to_raw(self):
+        return '={}='.format(self.text)