Pass markup tests.

2020-10-10 00:39:32 +02:00 · 2020-10-10 00:39:32 +02:00 · 5b886e5e24
commit 5b886e5e24
parent 0dab7e4703
4 changed files with 230 additions and 22 deletions
--- a/org_dom/org_dom.py
+++ b/org_dom/org_dom.py
@ -109,28 +109,187 @@ def timestamp_to_string(ts):
    else:
        return '[{}]'.format(base)

+
+class Line:
+    def __init__(self, linenum, contents):
+        self.linenum = linenum
+        self.contents = contents
+
+    def get_raw(self):
+        rawchunks = []
+        for chunk in self.contents:
+            if isinstance(chunk, str):
+                rawchunks.append(chunk)
+            else:
+                rawchunks.append(chunk.get_raw())
+        return ''.join(rawchunks) + '\n'
+
+class Text:
+    def __init__(self, contents):
+        self.contents = contents
+
+    def get_raw(self):
+        raw = ''.join(self.contents)
+        return raw
+
+class Bold:
+    def __init__(self, contents):
+        self.contents = contents
+
+    def get_raw(self):
+        raw = ''.join(self.contents)
+        return f"*{raw}*"
+
+class Code:
+    def __init__(self, contents):
+        self.contents = contents
+
+    def get_raw(self):
+        raw = ''.join(self.contents)
+        return f"~{raw}~"
+
+class Italic:
+    def __init__(self, contents):
+        self.contents = contents
+
+    def get_raw(self):
+        raw = ''.join(self.contents)
+        return f"/{raw}/"
+
+class Strike:
+    def __init__(self, contents):
+        self.contents = contents
+
+    def get_raw(self):
+        raw = ''.join(self.contents)
+        return f"+{raw}+"
+
+class Underlined:
+    def __init__(self, contents):
+        self.contents = contents
+
+    def get_raw(self):
+        raw = ''.join(self.contents)
+        return f"_{raw}_"
+
+class Verbatim:
+    def __init__(self, contents):
+        self.contents = contents
+
+    def get_raw(self):
+        raw = ''.join(self.contents)
+        return f"={raw}="
+
+
+def parse_contents(raw_contents:List[RawLine]):
+    NO_MODE = 0
+    BOLD_MODE = 1
+    CODE_MODE = 2
+    ITALIC_MODE = 3
+    STRIKE_MODE = 4
+    UNDERLINED_MODE = 5
+    VERBATIM_MODE = 6
+
+    MODE_CLASS = {
+        NO_MODE: Line,
+        BOLD_MODE: Bold,
+        CODE_MODE: Code,
+        ITALIC_MODE: Italic,
+        STRIKE_MODE: Strike,
+        UNDERLINED_MODE: Underlined,
+        VERBATIM_MODE: Verbatim,
+    }
+
+    mode = NO_MODE
+    escaped = False
+
+    chunk = []
+    inline = []
+    chunks = []
+
+    linenum = start_linenum = raw_contents[0].linenum
+    contents_buff = []
+    for line in raw_contents:
+        contents_buff.append(line.line)
+
+    contents = '\n'.join(contents_buff)
+
+    for c in contents:
+        if mode == NO_MODE:
+            if escaped:
+                chunk.append(c)
+                escaped = False
+
+            elif c == '\\':
+                escaped = True
+            elif c == '*':
+                mode = BOLD_MODE
+            elif c == '~':
+                mode = CODE_MODE
+            elif c == '/':
+                mode = ITALIC_MODE
+            elif c == '+':
+                mode = STRIKE_MODE
+            elif c == '_':
+                mode = UNDERLINED_MODE
+            elif c == '=':
+                mode = VERBATIM_MODE
+            elif c == '\n':
+                chunks.append(Line(linenum, inline + [Text(chunk)]))
+                chunk = []
+                inline = []
+            else:
+                chunk.append(c)
+
+            if mode != NO_MODE:
+                inline.append(Text([''.join(chunk)]))
+                chunk = []
+        else:
+            if escaped:
+                chunk.append(c)
+                escaped = False
+
+            was_mode = mode
+            if mode == BOLD_MODE and c == '*':
+                mode = NO_MODE
+            elif mode == CODE_MODE and c == '~':
+                mode = NO_MODE
+            elif mode == ITALIC_MODE and c == '/':
+                mode = NO_MODE
+            elif mode == STRIKE_MODE and c == '+':
+                mode = NO_MODE
+            elif mode == UNDERLINED_MODE and c == '_':
+                mode = NO_MODE
+            elif mode == VERBATIM_MODE and c == '=':
+                mode = NO_MODE
+            elif c == '\n':
+                raise NotImplementedError("[{} | {}]".format(c, chunk))
+            else:
+                chunk.append(c)
+
+            if mode == NO_MODE:
+                inline.append(MODE_CLASS[was_mode](''.join(chunk)))
+                chunk = []
+
+    assert(len(chunk) == 0)
+    assert(len(inline) == 0)
+
+    return chunks
+
 def parse_headline(hl) -> Headline:
-    # 'linenum': linenum,
-    # 'orig': match,
-    # 'title': match.group('line'),
-    # 'contents': [],
-    # 'children': [],
-    # 'keywords': [],
-    # 'properties': [],
-    # 'structural': [],
-    # HEADLINE_RE = re.compile(r'^(?P<stars>\*+) (?P<spacing>\s*)(?P<line>.*)$')
    stars = hl['orig'].group('stars')
    depth = len(stars)

    # TODO: Parse line for priority, cookies and tags
    line = hl['orig'].group('line')
    title = line.strip()
+    contents = parse_contents(hl['contents'])

    return Headline(start_line=hl['linenum'],
                    depth=depth,
                    orig=hl['orig'],
                    title=title,
-                    contents=hl['contents'],
+                    contents=contents,
                    children=[parse_headline(child) for child in hl['children']],
                    keywords=hl['keywords'],
                    properties=hl['properties'],
@ -191,8 +350,11 @@ class OrgDom:
            value=value,
        ))

-    def dump_contents(self, raw: RawLine):
-        return (raw.linenum, raw.line)
+    def dump_contents(self, raw):
+        if isinstance(raw, RawLine):
+            return (raw.linenum, raw.line)
+        else:
+            return (raw.linenum, raw.get_raw())

    def dump_structural(self, structural: Tuple):
        return (structural[0], structural[1])
@ -227,18 +389,21 @@ class OrgDom:

            if ltype == PROPERTIES_T and last_type not in (STRUCTURAL_T, PROPERTIES_T):
                # No structural opening
-                structured_lines.append(' ' * content.index(':') + ':PROPERTIES:')
+                structured_lines.append(' ' * content.index(':') + ':PROPERTIES:\n')
                logging.warning("Added structural: ".format(line[1][0], structured_lines[-1].strip()))
            elif ltype not in (STRUCTURAL_T, PROPERTIES_T) and last_type == PROPERTIES_T:
                # No structural closing
                last_line = lines[i - 1][1][1]
-                structured_lines.append(' ' * last_line.index(':') + ':END:')
+                structured_lines.append(' ' * last_line.index(':') + ':END:\n')
                logging.warning("Added structural:{}: {}".format(line[1][0], structured_lines[-1].strip()))

+            elif ltype != CONTENT_T:
+                content = content + '\n'
+
            last_type = ltype
            structured_lines.append(content)

-        yield from structured_lines
+        yield ''.join(structured_lines)

        for child in headline.children:
            yield from self.dump_headline(child)
@ -372,5 +537,7 @@ def load(f, environment=BASE_ENVIRONMENT, extra_cautious=False):


 def dumps(doc):
-    result = '\n'.join(doc.dump())
+    dump = list(doc.dump())
+    result = '\n'.join(dump)
+    print(result)
    return result
--- a/org_dom/utils.py
+++ b/org_dom/utils.py
@ -1,4 +1,4 @@
-from .org_dom import Headline, RawLine
+from .org_dom import Headline, Line, RawLine


 def get_hl_raw_contents(doc: Headline) -> str:
@ -7,16 +7,25 @@ def get_hl_raw_contents(doc: Headline) -> str:
    for content in doc.contents:
        lines.append(get_raw_contents(content))

-    return '\n'.join(lines)
+    raw = ''.join(lines)
+    return raw


 def get_rawline_contents(doc: RawLine) -> str:
    return doc.line


+def get_span_contents(doc: Line) -> str:
+    return doc.get_raw()
+
+
 def get_raw_contents(doc) -> str:
    if isinstance(doc, Headline):
        return get_hl_raw_contents(doc)
    if isinstance(doc, RawLine):
        return get_rawline_contents(doc)
+    if isinstance(doc, Line):
+        return get_span_contents(doc)
+    if isinstance(doc, list):
+        return ''.join([get_raw_contents(chunk) for chunk in doc])
    raise NotImplementedError('Unhandled type: ' + str(doc))
--- a/tests/test_dom.py
+++ b/tests/test_dom.py
@ -76,7 +76,6 @@ class TestSerde(unittest.TestCase):
                                  SPAN(""),
                                  SPAN("  This is a ", CODE("code phrase"),
                                       "."),
-                                  SPAN(""),
                              ])))

        ex.assert_matches(self, doc)
--- a/tests/utils/dom_assertions.py
+++ b/tests/utils/dom_assertions.py
@ -2,7 +2,7 @@ import collections
 import unittest
 from datetime import datetime

-from org_dom import get_raw_contents
+from org_dom import Line, Text, Bold, Code, Italic, Strike, Underlined, Verbatim, get_raw_contents


 def timestamp_to_datetime(ts):
@ -68,10 +68,12 @@ class HL:
        if isinstance(self.content, str):
            test_case.assertEqual(get_raw_contents(doc), self.content)
        else:
+            if len(doc.contents) != len(self.content):
+                print("Contents:", doc.contents)
+                print("Expected:", self.content)
            test_case.assertEqual(len(doc.contents), len(self.content))
            for i, content in enumerate(self.content):
-                test_case.assertEqual(get_raw_contents(doc.contents[i]),
-                                      content.to_raw())
+                content.assert_matches(test_case, doc.contents[i])

        # Check children
        if self.children is None:
@ -99,6 +101,16 @@ class SPAN:

        return ''.join(chunks)

+    def assert_matches(self, test_case, doc):
+        if not isinstance(doc, Line):
+            return False
+        for i, section in enumerate(self.contents):
+            if isinstance(section, str):
+                test_case.assertTrue(isinstance(doc.contents[i], Text))
+                test_case.assertEqual(section, doc.contents[i].get_raw())
+            else:
+                section.assertEqual(test_case, doc.contents[i])
+

 class BOLD:
    def __init__(self, text):
@ -107,6 +119,10 @@ class BOLD:
    def to_raw(self):
        return '*{}*'.format(self.text)

+    def assertEqual(self, test_case, other):
+        test_case.assertTrue(isinstance(other, Bold))
+        test_case.assertEqual(self.text, other.contents)
+

 class CODE:
    def __init__(self, text):
@ -115,6 +131,9 @@ class CODE:
    def to_raw(self):
        return '~{}~'.format(self.text)

+    def assertEqual(self, test_case, other):
+        test_case.assertTrue(isinstance(other, Code))
+        test_case.assertEqual(self.text, other.contents)

 class ITALIC:
    def __init__(self, text):
@ -123,6 +142,9 @@ class ITALIC:
    def to_raw(self):
        return '/{}/'.format(self.text)

+    def assertEqual(self, test_case, other):
+        test_case.assertTrue(isinstance(other, Italic))
+        test_case.assertEqual(self.text, other.contents)

 class STRIKE:
    def __init__(self, text):
@ -131,6 +153,10 @@ class STRIKE:
    def to_raw(self):
        return '+{}+'.format(self.text)

+    def assertEqual(self, test_case, other):
+        test_case.assertTrue(isinstance(other, Strike))
+        test_case.assertEqual(self.text, other.contents)
+

 class UNDERLINED:
    def __init__(self, text):
@ -139,6 +165,9 @@ class UNDERLINED:
    def to_raw(self):
        return '_{}_'.format(self.text)

+    def assertEqual(self, test_case, other):
+        test_case.assertTrue(isinstance(other, Underlined))
+        test_case.assertEqual(self.text, other.contents)

 class VERBATIM:
    def __init__(self, text):
@ -146,3 +175,7 @@ class VERBATIM:

    def to_raw(self):
        return '={}='.format(self.text)
+
+    def assertEqual(self, test_case, other):
+        test_case.assertTrue(isinstance(other, Verbatim))
+        test_case.assertEqual(self.text, other.contents)