From 5b886e5e242928a799d56a1e2cc1ae48a96e6f61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sat, 10 Oct 2020 00:39:32 +0200 Subject: [PATCH] Pass markup tests. --- org_dom/org_dom.py | 199 +++++++++++++++++++++++++++++++--- org_dom/utils.py | 13 ++- tests/test_dom.py | 1 - tests/utils/dom_assertions.py | 39 ++++++- 4 files changed, 230 insertions(+), 22 deletions(-) diff --git a/org_dom/org_dom.py b/org_dom/org_dom.py index 68f8d80..8ff5746 100644 --- a/org_dom/org_dom.py +++ b/org_dom/org_dom.py @@ -109,28 +109,187 @@ def timestamp_to_string(ts): else: return '[{}]'.format(base) + +class Line: + def __init__(self, linenum, contents): + self.linenum = linenum + self.contents = contents + + def get_raw(self): + rawchunks = [] + for chunk in self.contents: + if isinstance(chunk, str): + rawchunks.append(chunk) + else: + rawchunks.append(chunk.get_raw()) + return ''.join(rawchunks) + '\n' + +class Text: + def __init__(self, contents): + self.contents = contents + + def get_raw(self): + raw = ''.join(self.contents) + return raw + +class Bold: + def __init__(self, contents): + self.contents = contents + + def get_raw(self): + raw = ''.join(self.contents) + return f"*{raw}*" + +class Code: + def __init__(self, contents): + self.contents = contents + + def get_raw(self): + raw = ''.join(self.contents) + return f"~{raw}~" + +class Italic: + def __init__(self, contents): + self.contents = contents + + def get_raw(self): + raw = ''.join(self.contents) + return f"/{raw}/" + +class Strike: + def __init__(self, contents): + self.contents = contents + + def get_raw(self): + raw = ''.join(self.contents) + return f"+{raw}+" + +class Underlined: + def __init__(self, contents): + self.contents = contents + + def get_raw(self): + raw = ''.join(self.contents) + return f"_{raw}_" + +class Verbatim: + def __init__(self, contents): + self.contents = contents + + def get_raw(self): + raw = ''.join(self.contents) + return f"={raw}=" + + +def parse_contents(raw_contents:List[RawLine]): + NO_MODE = 0 + BOLD_MODE = 1 + CODE_MODE = 2 + ITALIC_MODE = 3 + STRIKE_MODE = 4 + UNDERLINED_MODE = 5 + VERBATIM_MODE = 6 + + MODE_CLASS = { + NO_MODE: Line, + BOLD_MODE: Bold, + CODE_MODE: Code, + ITALIC_MODE: Italic, + STRIKE_MODE: Strike, + UNDERLINED_MODE: Underlined, + VERBATIM_MODE: Verbatim, + } + + mode = NO_MODE + escaped = False + + chunk = [] + inline = [] + chunks = [] + + linenum = start_linenum = raw_contents[0].linenum + contents_buff = [] + for line in raw_contents: + contents_buff.append(line.line) + + contents = '\n'.join(contents_buff) + + for c in contents: + if mode == NO_MODE: + if escaped: + chunk.append(c) + escaped = False + + elif c == '\\': + escaped = True + elif c == '*': + mode = BOLD_MODE + elif c == '~': + mode = CODE_MODE + elif c == '/': + mode = ITALIC_MODE + elif c == '+': + mode = STRIKE_MODE + elif c == '_': + mode = UNDERLINED_MODE + elif c == '=': + mode = VERBATIM_MODE + elif c == '\n': + chunks.append(Line(linenum, inline + [Text(chunk)])) + chunk = [] + inline = [] + else: + chunk.append(c) + + if mode != NO_MODE: + inline.append(Text([''.join(chunk)])) + chunk = [] + else: + if escaped: + chunk.append(c) + escaped = False + + was_mode = mode + if mode == BOLD_MODE and c == '*': + mode = NO_MODE + elif mode == CODE_MODE and c == '~': + mode = NO_MODE + elif mode == ITALIC_MODE and c == '/': + mode = NO_MODE + elif mode == STRIKE_MODE and c == '+': + mode = NO_MODE + elif mode == UNDERLINED_MODE and c == '_': + mode = NO_MODE + elif mode == VERBATIM_MODE and c == '=': + mode = NO_MODE + elif c == '\n': + raise NotImplementedError("[{} | {}]".format(c, chunk)) + else: + chunk.append(c) + + if mode == NO_MODE: + inline.append(MODE_CLASS[was_mode](''.join(chunk))) + chunk = [] + + assert(len(chunk) == 0) + assert(len(inline) == 0) + + return chunks + def parse_headline(hl) -> Headline: - # 'linenum': linenum, - # 'orig': match, - # 'title': match.group('line'), - # 'contents': [], - # 'children': [], - # 'keywords': [], - # 'properties': [], - # 'structural': [], - # HEADLINE_RE = re.compile(r'^(?P\*+) (?P\s*)(?P.*)$') stars = hl['orig'].group('stars') depth = len(stars) # TODO: Parse line for priority, cookies and tags line = hl['orig'].group('line') title = line.strip() + contents = parse_contents(hl['contents']) return Headline(start_line=hl['linenum'], depth=depth, orig=hl['orig'], title=title, - contents=hl['contents'], + contents=contents, children=[parse_headline(child) for child in hl['children']], keywords=hl['keywords'], properties=hl['properties'], @@ -191,8 +350,11 @@ class OrgDom: value=value, )) - def dump_contents(self, raw: RawLine): - return (raw.linenum, raw.line) + def dump_contents(self, raw): + if isinstance(raw, RawLine): + return (raw.linenum, raw.line) + else: + return (raw.linenum, raw.get_raw()) def dump_structural(self, structural: Tuple): return (structural[0], structural[1]) @@ -227,18 +389,21 @@ class OrgDom: if ltype == PROPERTIES_T and last_type not in (STRUCTURAL_T, PROPERTIES_T): # No structural opening - structured_lines.append(' ' * content.index(':') + ':PROPERTIES:') + structured_lines.append(' ' * content.index(':') + ':PROPERTIES:\n') logging.warning("Added structural: ".format(line[1][0], structured_lines[-1].strip())) elif ltype not in (STRUCTURAL_T, PROPERTIES_T) and last_type == PROPERTIES_T: # No structural closing last_line = lines[i - 1][1][1] - structured_lines.append(' ' * last_line.index(':') + ':END:') + structured_lines.append(' ' * last_line.index(':') + ':END:\n') logging.warning("Added structural:{}: {}".format(line[1][0], structured_lines[-1].strip())) + elif ltype != CONTENT_T: + content = content + '\n' + last_type = ltype structured_lines.append(content) - yield from structured_lines + yield ''.join(structured_lines) for child in headline.children: yield from self.dump_headline(child) @@ -372,5 +537,7 @@ def load(f, environment=BASE_ENVIRONMENT, extra_cautious=False): def dumps(doc): - result = '\n'.join(doc.dump()) + dump = list(doc.dump()) + result = '\n'.join(dump) + print(result) return result diff --git a/org_dom/utils.py b/org_dom/utils.py index 8be37ad..e7c3e8c 100644 --- a/org_dom/utils.py +++ b/org_dom/utils.py @@ -1,4 +1,4 @@ -from .org_dom import Headline, RawLine +from .org_dom import Headline, Line, RawLine def get_hl_raw_contents(doc: Headline) -> str: @@ -7,16 +7,25 @@ def get_hl_raw_contents(doc: Headline) -> str: for content in doc.contents: lines.append(get_raw_contents(content)) - return '\n'.join(lines) + raw = ''.join(lines) + return raw def get_rawline_contents(doc: RawLine) -> str: return doc.line +def get_span_contents(doc: Line) -> str: + return doc.get_raw() + + def get_raw_contents(doc) -> str: if isinstance(doc, Headline): return get_hl_raw_contents(doc) if isinstance(doc, RawLine): return get_rawline_contents(doc) + if isinstance(doc, Line): + return get_span_contents(doc) + if isinstance(doc, list): + return ''.join([get_raw_contents(chunk) for chunk in doc]) raise NotImplementedError('Unhandled type: ' + str(doc)) diff --git a/tests/test_dom.py b/tests/test_dom.py index 6659a78..5b1f8cb 100644 --- a/tests/test_dom.py +++ b/tests/test_dom.py @@ -76,7 +76,6 @@ class TestSerde(unittest.TestCase): SPAN(""), SPAN(" This is a ", CODE("code phrase"), "."), - SPAN(""), ]))) ex.assert_matches(self, doc) diff --git a/tests/utils/dom_assertions.py b/tests/utils/dom_assertions.py index c69ad09..9bd77e7 100644 --- a/tests/utils/dom_assertions.py +++ b/tests/utils/dom_assertions.py @@ -2,7 +2,7 @@ import collections import unittest from datetime import datetime -from org_dom import get_raw_contents +from org_dom import Line, Text, Bold, Code, Italic, Strike, Underlined, Verbatim, get_raw_contents def timestamp_to_datetime(ts): @@ -68,10 +68,12 @@ class HL: if isinstance(self.content, str): test_case.assertEqual(get_raw_contents(doc), self.content) else: + if len(doc.contents) != len(self.content): + print("Contents:", doc.contents) + print("Expected:", self.content) test_case.assertEqual(len(doc.contents), len(self.content)) for i, content in enumerate(self.content): - test_case.assertEqual(get_raw_contents(doc.contents[i]), - content.to_raw()) + content.assert_matches(test_case, doc.contents[i]) # Check children if self.children is None: @@ -99,6 +101,16 @@ class SPAN: return ''.join(chunks) + def assert_matches(self, test_case, doc): + if not isinstance(doc, Line): + return False + for i, section in enumerate(self.contents): + if isinstance(section, str): + test_case.assertTrue(isinstance(doc.contents[i], Text)) + test_case.assertEqual(section, doc.contents[i].get_raw()) + else: + section.assertEqual(test_case, doc.contents[i]) + class BOLD: def __init__(self, text): @@ -107,6 +119,10 @@ class BOLD: def to_raw(self): return '*{}*'.format(self.text) + def assertEqual(self, test_case, other): + test_case.assertTrue(isinstance(other, Bold)) + test_case.assertEqual(self.text, other.contents) + class CODE: def __init__(self, text): @@ -115,6 +131,9 @@ class CODE: def to_raw(self): return '~{}~'.format(self.text) + def assertEqual(self, test_case, other): + test_case.assertTrue(isinstance(other, Code)) + test_case.assertEqual(self.text, other.contents) class ITALIC: def __init__(self, text): @@ -123,6 +142,9 @@ class ITALIC: def to_raw(self): return '/{}/'.format(self.text) + def assertEqual(self, test_case, other): + test_case.assertTrue(isinstance(other, Italic)) + test_case.assertEqual(self.text, other.contents) class STRIKE: def __init__(self, text): @@ -131,6 +153,10 @@ class STRIKE: def to_raw(self): return '+{}+'.format(self.text) + def assertEqual(self, test_case, other): + test_case.assertTrue(isinstance(other, Strike)) + test_case.assertEqual(self.text, other.contents) + class UNDERLINED: def __init__(self, text): @@ -139,6 +165,9 @@ class UNDERLINED: def to_raw(self): return '_{}_'.format(self.text) + def assertEqual(self, test_case, other): + test_case.assertTrue(isinstance(other, Underlined)) + test_case.assertEqual(self.text, other.contents) class VERBATIM: def __init__(self, text): @@ -146,3 +175,7 @@ class VERBATIM: def to_raw(self): return '={}='.format(self.text) + + def assertEqual(self, test_case, other): + test_case.assertTrue(isinstance(other, Verbatim)) + test_case.assertEqual(self.text, other.contents)