From 0dab7e4703f8a238897ac520caa575250a87fadd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sat, 27 Jun 2020 19:20:34 +0200 Subject: [PATCH] Add base support for markup tests. --- org_dom/__init__.py | 1 + org_dom/org_dom.py | 60 ++++++++++++++++++++----- org_dom/utils.py | 22 ++++++++++ tests/02-markup.org | 21 +++++++++ tests/test_dom.py | 44 +++++++++++++++++-- tests/utils/dom_assertions.py | 83 ++++++++++++++++++++++++++++++++--- 6 files changed, 209 insertions(+), 22 deletions(-) create mode 100644 org_dom/utils.py create mode 100644 tests/02-markup.org diff --git a/org_dom/__init__.py b/org_dom/__init__.py index 522a603..28c7ee6 100644 --- a/org_dom/__init__.py +++ b/org_dom/__init__.py @@ -1 +1,2 @@ from .org_dom import * +from .utils import * diff --git a/org_dom/org_dom.py b/org_dom/org_dom.py index a4af677..68f8d80 100644 --- a/org_dom/org_dom.py +++ b/org_dom/org_dom.py @@ -55,12 +55,14 @@ INACTIVE_TIME_STAMP_RE = re.compile(r'\[{}\]'.format(BASE_TIME_STAMP_RE)) # r'(?P\d{4})-(?P\d{2})-(?P\d{2}) (?P[^ ]+)((?P\d{1,2}):(?P\d{1,2}))?') Headline = collections.namedtuple('Headline', ('start_line', 'depth', - 'keyword_start', 'keyword', + 'orig', + 'properties', 'keywords', 'priority_start', 'priority', 'title_start', 'title', 'tags_start', 'tags', - 'content', + 'contents', 'children', + 'structural', )) RawLine = collections.namedtuple('RawLine', ('linenum', 'line')) @@ -107,9 +109,43 @@ def timestamp_to_string(ts): else: return '[{}]'.format(base) +def parse_headline(hl) -> Headline: + # 'linenum': linenum, + # 'orig': match, + # 'title': match.group('line'), + # 'contents': [], + # 'children': [], + # 'keywords': [], + # 'properties': [], + # 'structural': [], + # HEADLINE_RE = re.compile(r'^(?P\*+) (?P\s*)(?P.*)$') + stars = hl['orig'].group('stars') + depth = len(stars) + + # TODO: Parse line for priority, cookies and tags + line = hl['orig'].group('line') + title = line.strip() + + return Headline(start_line=hl['linenum'], + depth=depth, + orig=hl['orig'], + title=title, + contents=hl['contents'], + children=[parse_headline(child) for child in hl['children']], + keywords=hl['keywords'], + properties=hl['properties'], + structural=hl['structural'], + title_start=None, + priority=None, + priority_start=None, + tags_start=None, + tags=None, + ) + + class OrgDom: def __init__(self, headlines, keywords, contents): - self.headlines: List[Headline] = headlines + self.headlines: List[Headline] = list(map(parse_headline, headlines)) self.keywords: List[Property] = keywords self.contents: List[RawLine] = contents @@ -162,23 +198,23 @@ class OrgDom: return (structural[0], structural[1]) def dump_headline(self, headline): - yield headline['orig'].group('stars') + ' ' + headline['orig'].group('spacing') + headline['orig'].group('line') + yield '*' * headline.depth + ' ' + headline.orig.group('spacing') + headline.title lines = [] KW_T = 0 CONTENT_T = 1 PROPERTIES_T = 2 STRUCTURAL_T = 3 - for keyword in headline['keywords']: + for keyword in headline.keywords: lines.append((KW_T, self.dump_kw(keyword))) - for content in headline['contents']: + for content in headline.contents: lines.append((CONTENT_T, self.dump_contents(content))) - for prop in headline['properties']: + for prop in headline.properties: lines.append((PROPERTIES_T, self.dump_property(prop))) - for struct in headline['structural']: + for struct in headline.structural: lines.append((STRUCTURAL_T, self.dump_structural(struct))) lines = sorted(lines, key=lambda x: x[1][0]) @@ -204,7 +240,7 @@ class OrgDom: yield from structured_lines - for child in headline['children']: + for child in headline.children: yield from self.dump_headline(child) def dump(self): @@ -235,7 +271,7 @@ class OrgDomReader: def add_headline(self, linenum: int, match: re.Match) -> int: # Position reader on the proper headline stars = match.group('stars') - depth = len(stars) - 1 + depth = len(stars) headline = { 'linenum': linenum, @@ -248,13 +284,13 @@ class OrgDomReader: 'structural': [], } - while (depth - 1) > len(self.headline_hierarchy): + while (depth - 2) > len(self.headline_hierarchy): # Introduce structural headlines self.headline_hierarchy.append(None) while depth < len(self.headline_hierarchy): self.headline_hierarchy.pop() - if depth == 0: + if depth == 1: self.headlines.append(headline) else: self.headline_hierarchy[-1]['children'].append(headline) diff --git a/org_dom/utils.py b/org_dom/utils.py new file mode 100644 index 0000000..8be37ad --- /dev/null +++ b/org_dom/utils.py @@ -0,0 +1,22 @@ +from .org_dom import Headline, RawLine + + +def get_hl_raw_contents(doc: Headline) -> str: + lines = [] + + for content in doc.contents: + lines.append(get_raw_contents(content)) + + return '\n'.join(lines) + + +def get_rawline_contents(doc: RawLine) -> str: + return doc.line + + +def get_raw_contents(doc) -> str: + if isinstance(doc, Headline): + return get_hl_raw_contents(doc) + if isinstance(doc, RawLine): + return get_rawline_contents(doc) + raise NotImplementedError('Unhandled type: ' + str(doc)) diff --git a/tests/02-markup.org b/tests/02-markup.org new file mode 100644 index 0000000..18d58f5 --- /dev/null +++ b/tests/02-markup.org @@ -0,0 +1,21 @@ +#+TITLE: 02-Markup +#+DESCRIPTION: Simple org file to test markup +#+TODO: TODO(t) PAUSED(p) | DONE(d) + + +* First level + :PROPERTIES: + :ID: 02-markup-first-level-id + :CREATED: [2020-01-01 Wed 01:01] + :END: + This is a *bold phrase*. + + This is a =verbatim phrase=. + + This is a /italic phrase/. + + This is a +strike-through phrase+. + + This is a _underlined phrase_. + + This is a ~code phrase~. diff --git a/tests/test_dom.py b/tests/test_dom.py index bef59ec..6659a78 100644 --- a/tests/test_dom.py +++ b/tests/test_dom.py @@ -4,7 +4,8 @@ import unittest from datetime import datetime as DT from org_dom import dumps, load, loads -from utils.dom_assertions import HL, Dom +from utils.dom_assertions import (BOLD, CODE, HL, ITALIC, SPAN, STRIKE, + UNDERLINED, VERBATIM, Dom) DIR = os.path.dirname(os.path.abspath(__file__)) @@ -23,15 +24,15 @@ class TestSerde(unittest.TestCase): ('ID', '01-simple-first-level-id'), ('CREATED', DT(2020, 1, 1, 1, 1)), ], - content='First level content', + content=' First level content\n', children=[ HL('Second level', props=[('ID', '01-simple-second-level-id')], - content='Second level content', + content='\n Second level content\n', children=[ HL('Third level', props=[('ID', '01-simple-third-level-id')], - content='Third level content') + content='\n Third level content\n') ]) ]))) @@ -44,3 +45,38 @@ class TestSerde(unittest.TestCase): doc = loads(orig) self.assertEqual(dumps(doc), orig) + + def test_markup_file_02(self): + with open(os.path.join(DIR, '02-markup.org')) as f: + doc = load(f) + + ex = Dom(props=[('TITLE', '02-Markup'), + ('DESCRIPTION', 'Simple org file to test markup'), + ('TODO', 'TODO(t) PAUSED(p) | DONE(d)')], + children=(HL('First level', + props=[ + ('ID', '02-markup-first-level-id'), + ('CREATED', DT(2020, 1, 1, 1, 1)), + ], + content=[ + SPAN(" This is a ", BOLD("bold phrase"), + "."), + SPAN(""), + SPAN(" This is a ", + VERBATIM("verbatim phrase"), "."), + SPAN(""), + SPAN(" This is a ", ITALIC("italic phrase"), + "."), + SPAN(""), + SPAN(" This is a ", + STRIKE("strike-through phrase"), "."), + SPAN(""), + SPAN(" This is a ", + UNDERLINED("underlined phrase"), "."), + SPAN(""), + SPAN(" This is a ", CODE("code phrase"), + "."), + SPAN(""), + ]))) + + ex.assert_matches(self, doc) diff --git a/tests/utils/dom_assertions.py b/tests/utils/dom_assertions.py index 3562687..c69ad09 100644 --- a/tests/utils/dom_assertions.py +++ b/tests/utils/dom_assertions.py @@ -2,6 +2,8 @@ import collections import unittest from datetime import datetime +from org_dom import get_raw_contents + def timestamp_to_datetime(ts): return datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute) @@ -48,13 +50,13 @@ class HL: self.children = children def assert_matches(self, test_case: unittest.TestCase, doc): - test_case.assertEqual(self.title, doc['title']) + test_case.assertEqual(self.title, doc.title) # Check properties if self.props is None: - test_case.assertEqual(len(doc['properties']), 0) + test_case.assertEqual(len(doc.properties), 0) else: - doc_props = doc['properties'] + doc_props = doc.properties test_case.assertEqual(len(doc_props), len(self.props)) for i, prop in enumerate(self.props): @@ -63,15 +65,84 @@ class HL: test_case.assertEqual( timestamp_to_datetime(doc_props[i].value), prop[1]) - # @TODO: Check properties + if isinstance(self.content, str): + test_case.assertEqual(get_raw_contents(doc), self.content) + else: + test_case.assertEqual(len(doc.contents), len(self.content)) + for i, content in enumerate(self.content): + test_case.assertEqual(get_raw_contents(doc.contents[i]), + content.to_raw()) # Check children if self.children is None: - test_case.assertEqual(len(doc['children']), 0) + test_case.assertEqual(len(doc.children), 0) else: - doc_headlines = doc['children'] + doc_headlines = doc.children test_case.assertEqual(len(doc_headlines), len(self.children), self.title) for i, children in enumerate(self.children): children.assert_matches(test_case, doc_headlines[i]) + + +class SPAN: + def __init__(self, *kwargs): + self.contents = kwargs + + def to_raw(self): + chunks = [] + for section in self.contents: + if isinstance(section, str): + chunks.append(section) + else: + chunks.append(section.to_raw()) + + return ''.join(chunks) + + +class BOLD: + def __init__(self, text): + self.text = text + + def to_raw(self): + return '*{}*'.format(self.text) + + +class CODE: + def __init__(self, text): + self.text = text + + def to_raw(self): + return '~{}~'.format(self.text) + + +class ITALIC: + def __init__(self, text): + self.text = text + + def to_raw(self): + return '/{}/'.format(self.text) + + +class STRIKE: + def __init__(self, text): + self.text = text + + def to_raw(self): + return '+{}+'.format(self.text) + + +class UNDERLINED: + def __init__(self, text): + self.text = text + + def to_raw(self): + return '_{}_'.format(self.text) + + +class VERBATIM: + def __init__(self, text): + self.text = text + + def to_raw(self): + return '={}='.format(self.text)