Pass markup tests.

2020-10-10 00:39:32 +02:00 · 2020-10-10 00:39:32 +02:00 · 5b886e5e24
commit 5b886e5e24
parent 0dab7e4703
4 changed files with 230 additions and 22 deletions
--- a/org_dom/org_dom.py
+++ b/org_dom/org_dom.py
@ -109,28 +109,187 @@ def timestamp_to_string(ts):
    else:
        return '[{}]'.format(base)
 class Line:
    def __init__(self, linenum, contents):
        self.linenum = linenum
        self.contents = contents
    def get_raw(self):
        rawchunks = []
        for chunk in self.contents:
            if isinstance(chunk, str):
                rawchunks.append(chunk)
            else:
                rawchunks.append(chunk.get_raw())
        return ''.join(rawchunks) + '\n'
 class Text:
    def __init__(self, contents):
        self.contents = contents
    def get_raw(self):
        raw = ''.join(self.contents)
        return raw
 class Bold:
    def __init__(self, contents):
        self.contents = contents
    def get_raw(self):
        raw = ''.join(self.contents)
        return f"*{raw}*"
 class Code:
    def __init__(self, contents):
        self.contents = contents
    def get_raw(self):
        raw = ''.join(self.contents)
        return f"~{raw}~"
 class Italic:
    def __init__(self, contents):
        self.contents = contents
    def get_raw(self):
        raw = ''.join(self.contents)
        return f"/{raw}/"
 class Strike:
    def __init__(self, contents):
        self.contents = contents
    def get_raw(self):
        raw = ''.join(self.contents)
        return f"+{raw}+"
 class Underlined:
    def __init__(self, contents):
        self.contents = contents
    def get_raw(self):
        raw = ''.join(self.contents)
        return f"_{raw}_"
 class Verbatim:
    def __init__(self, contents):
        self.contents = contents
    def get_raw(self):
        raw = ''.join(self.contents)
        return f"={raw}="
 def parse_contents(raw_contents:List[RawLine]):
    NO_MODE = 0
    BOLD_MODE = 1
    CODE_MODE = 2
    ITALIC_MODE = 3
    STRIKE_MODE = 4
    UNDERLINED_MODE = 5
    VERBATIM_MODE = 6
    MODE_CLASS = {
        NO_MODE: Line,
        BOLD_MODE: Bold,
        CODE_MODE: Code,
        ITALIC_MODE: Italic,
        STRIKE_MODE: Strike,
        UNDERLINED_MODE: Underlined,
        VERBATIM_MODE: Verbatim,
    }
    mode = NO_MODE
    escaped = False
    chunk = []
    inline = []
    chunks = []
    linenum = start_linenum = raw_contents[0].linenum
    contents_buff = []
    for line in raw_contents:
        contents_buff.append(line.line)
    contents = '\n'.join(contents_buff)
    for c in contents:
        if mode == NO_MODE:
            if escaped:
                chunk.append(c)
                escaped = False
            elif c == '\\':
                escaped = True
            elif c == '*':
                mode = BOLD_MODE
            elif c == '~':
                mode = CODE_MODE
            elif c == '/':
                mode = ITALIC_MODE
            elif c == '+':
                mode = STRIKE_MODE
            elif c == '_':
                mode = UNDERLINED_MODE
            elif c == '=':
                mode = VERBATIM_MODE
            elif c == '\n':
                chunks.append(Line(linenum, inline + [Text(chunk)]))
                chunk = []
                inline = []
            else:
                chunk.append(c)
            if mode != NO_MODE:
                inline.append(Text([''.join(chunk)]))
                chunk = []
        else:
            if escaped:
                chunk.append(c)
                escaped = False
            was_mode = mode
            if mode == BOLD_MODE and c == '*':
                mode = NO_MODE
            elif mode == CODE_MODE and c == '~':
                mode = NO_MODE
            elif mode == ITALIC_MODE and c == '/':
                mode = NO_MODE
            elif mode == STRIKE_MODE and c == '+':
                mode = NO_MODE
            elif mode == UNDERLINED_MODE and c == '_':
                mode = NO_MODE
            elif mode == VERBATIM_MODE and c == '=':
                mode = NO_MODE
            elif c == '\n':
                raise NotImplementedError("[{} | {}]".format(c, chunk))
            else:
                chunk.append(c)
            if mode == NO_MODE:
                inline.append(MODE_CLASS[was_mode](''.join(chunk)))
                chunk = []
    assert(len(chunk) == 0)
    assert(len(inline) == 0)
    return chunks
 def parse_headline(hl) -> Headline:
    # 'linenum': linenum,
    # 'orig': match,
    # 'title': match.group('line'),
    # 'contents': [],
    # 'children': [],
    # 'keywords': [],
    # 'properties': [],
    # 'structural': [],
    # HEADLINE_RE = re.compile(r'^(?P<stars>\*+) (?P<spacing>\s*)(?P<line>.*)$')
    stars = hl['orig'].group('stars')
    depth = len(stars)
    # TODO: Parse line for priority, cookies and tags
    line = hl['orig'].group('line')
    title = line.strip()
    contents = parse_contents(hl['contents'])
    return Headline(start_line=hl['linenum'],
                    depth=depth,
                    orig=hl['orig'],
                    title=title,
-                    contents=hl['contents'],
+                    contents=contents,
                    children=[parse_headline(child) for child in hl['children']],
                    keywords=hl['keywords'],
                    properties=hl['properties'],
@ -191,8 +350,11 @@ class OrgDom:
            value=value,
        ))
-    def dump_contents(self, raw: RawLine):
+    def dump_contents(self, raw):
        if isinstance(raw, RawLine):
            return (raw.linenum, raw.line)
        else:
            return (raw.linenum, raw.get_raw())
    def dump_structural(self, structural: Tuple):
        return (structural[0], structural[1])
@ -227,18 +389,21 @@ class OrgDom:
            if ltype == PROPERTIES_T and last_type not in (STRUCTURAL_T, PROPERTIES_T):
                # No structural opening
-                structured_lines.append(' ' * content.index(':') + ':PROPERTIES:')
+                structured_lines.append(' ' * content.index(':') + ':PROPERTIES:\n')
                logging.warning("Added structural: ".format(line[1][0], structured_lines[-1].strip()))
            elif ltype not in (STRUCTURAL_T, PROPERTIES_T) and last_type == PROPERTIES_T:
                # No structural closing
                last_line = lines[i - 1][1][1]
-                structured_lines.append(' ' * last_line.index(':') + ':END:')
+                structured_lines.append(' ' * last_line.index(':') + ':END:\n')
                logging.warning("Added structural:{}: {}".format(line[1][0], structured_lines[-1].strip()))
            elif ltype != CONTENT_T:
                content = content + '\n'
            last_type = ltype
            structured_lines.append(content)
-        yield from structured_lines
+        yield ''.join(structured_lines)
        for child in headline.children:
            yield from self.dump_headline(child)
@ -372,5 +537,7 @@ def load(f, environment=BASE_ENVIRONMENT, extra_cautious=False):
 def dumps(doc):
-    result = '\n'.join(doc.dump())
+    dump = list(doc.dump())
    result = '\n'.join(dump)
    print(result)
    return result
--- a/org_dom/utils.py
+++ b/org_dom/utils.py
@ -1,4 +1,4 @@
-from .org_dom import Headline, RawLine
+from .org_dom import Headline, Line, RawLine
 def get_hl_raw_contents(doc: Headline) -> str:
@ -7,16 +7,25 @@ def get_hl_raw_contents(doc: Headline) -> str:
    for content in doc.contents:
        lines.append(get_raw_contents(content))
-    return '\n'.join(lines)
+    raw = ''.join(lines)
    return raw
 def get_rawline_contents(doc: RawLine) -> str:
    return doc.line
 def get_span_contents(doc: Line) -> str:
    return doc.get_raw()
 def get_raw_contents(doc) -> str:
    if isinstance(doc, Headline):
        return get_hl_raw_contents(doc)
    if isinstance(doc, RawLine):
        return get_rawline_contents(doc)
    if isinstance(doc, Line):
        return get_span_contents(doc)
    if isinstance(doc, list):
        return ''.join([get_raw_contents(chunk) for chunk in doc])
    raise NotImplementedError('Unhandled type: ' + str(doc))
--- a/tests/test_dom.py
+++ b/tests/test_dom.py
@ -76,7 +76,6 @@ class TestSerde(unittest.TestCase):
                                  SPAN(""),
                                  SPAN("  This is a ", CODE("code phrase"),
                                       "."),
                                  SPAN(""),
                              ])))
        ex.assert_matches(self, doc)
--- a/tests/utils/dom_assertions.py
+++ b/tests/utils/dom_assertions.py
@ -2,7 +2,7 @@ import collections
 import unittest
 from datetime import datetime
-from org_dom import get_raw_contents
+from org_dom import Line, Text, Bold, Code, Italic, Strike, Underlined, Verbatim, get_raw_contents
 def timestamp_to_datetime(ts):
@ -68,10 +68,12 @@ class HL:
        if isinstance(self.content, str):
            test_case.assertEqual(get_raw_contents(doc), self.content)
        else:
            if len(doc.contents) != len(self.content):
                print("Contents:", doc.contents)
                print("Expected:", self.content)
            test_case.assertEqual(len(doc.contents), len(self.content))
            for i, content in enumerate(self.content):
-                test_case.assertEqual(get_raw_contents(doc.contents[i]),
+                content.assert_matches(test_case, doc.contents[i])
                                      content.to_raw())
        # Check children
        if self.children is None:
@ -99,6 +101,16 @@ class SPAN:
        return ''.join(chunks)
    def assert_matches(self, test_case, doc):
        if not isinstance(doc, Line):
            return False
        for i, section in enumerate(self.contents):
            if isinstance(section, str):
                test_case.assertTrue(isinstance(doc.contents[i], Text))
                test_case.assertEqual(section, doc.contents[i].get_raw())
            else:
                section.assertEqual(test_case, doc.contents[i])
 class BOLD:
    def __init__(self, text):
@ -107,6 +119,10 @@ class BOLD:
    def to_raw(self):
        return '*{}*'.format(self.text)
    def assertEqual(self, test_case, other):
        test_case.assertTrue(isinstance(other, Bold))
        test_case.assertEqual(self.text, other.contents)
 class CODE:
    def __init__(self, text):
@ -115,6 +131,9 @@ class CODE:
    def to_raw(self):
        return '~{}~'.format(self.text)
    def assertEqual(self, test_case, other):
        test_case.assertTrue(isinstance(other, Code))
        test_case.assertEqual(self.text, other.contents)
 class ITALIC:
    def __init__(self, text):
@ -123,6 +142,9 @@ class ITALIC:
    def to_raw(self):
        return '/{}/'.format(self.text)
    def assertEqual(self, test_case, other):
        test_case.assertTrue(isinstance(other, Italic))
        test_case.assertEqual(self.text, other.contents)
 class STRIKE:
    def __init__(self, text):
@ -131,6 +153,10 @@ class STRIKE:
    def to_raw(self):
        return '+{}+'.format(self.text)
    def assertEqual(self, test_case, other):
        test_case.assertTrue(isinstance(other, Strike))
        test_case.assertEqual(self.text, other.contents)
 class UNDERLINED:
    def __init__(self, text):
@ -139,6 +165,9 @@ class UNDERLINED:
    def to_raw(self):
        return '_{}_'.format(self.text)
    def assertEqual(self, test_case, other):
        test_case.assertTrue(isinstance(other, Underlined))
        test_case.assertEqual(self.text, other.contents)
 class VERBATIM:
    def __init__(self, text):
@ -146,3 +175,7 @@ class VERBATIM:
    def to_raw(self):
        return '={}='.format(self.text)
    def assertEqual(self, test_case, other):
        test_case.assertTrue(isinstance(other, Verbatim))
        test_case.assertEqual(self.text, other.contents)