From a5bfeadfebdfbdda64bdf621dfb0f2b7b397d8a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Fri, 4 Dec 2020 00:04:56 +0100 Subject: [PATCH] (In progress) Add support for SRC code blocks. - Add tests for blocks. - Add Regexps. - Correctly handle Headlines with split contents. --- org_dom/org_dom.py | 105 +++++++++++++++++++++++++++++++++++++-------- tests/04-code.org | 34 +++++++++++++++ tests/test_dom.py | 42 ++++++++++++------ 3 files changed, 150 insertions(+), 31 deletions(-) create mode 100644 tests/04-code.org diff --git a/org_dom/org_dom.py b/org_dom/org_dom.py index eef8f63..500d078 100644 --- a/org_dom/org_dom.py +++ b/org_dom/org_dom.py @@ -52,7 +52,7 @@ PROPERTY_DRAWER_RE = re.compile( LOGBOOK_DRAWER_RE = re.compile( r"^(?P\s*):LOGBOOK:(?P\s*)$" ) -DRAWER_END_RE = re.compile(r"^(?P\s*):END:(?P\s*)$") +DRAWER_END_RE = re.compile(r"^(?P\s*):END:(?P\s*)$", re.I) NODE_PROPERTIES_RE = re.compile( r"^(?P\s*):(?P[^+:]+)(?P\+)?:(?P\s*)(?P.*)$" ) @@ -62,6 +62,12 @@ BASE_TIME_STAMP_RE = r"(?P\d{4})-(?P\d{2})-(?P\d{2}) (?P[ ACTIVE_TIME_STAMP_RE = re.compile(r"<{}>".format(BASE_TIME_STAMP_RE)) INACTIVE_TIME_STAMP_RE = re.compile(r"\[{}\]".format(BASE_TIME_STAMP_RE)) +# Org-Babel +BEGIN_SRC_RE = re.compile(r"^\s*#\+BEGIN_SRC(\s+(?P.*))?$") +END_SRC_RE = re.compile(r"^\s*#\+END_SRC\s*$") +RESULTS_DRAWER_RE = re.compile(r"^\s*:results:\s*$") + + # BASE_TIME_RANGE_RE = (r'(?P\d{4})-(?P\d{2})-(?P\d{2}) (?P[^ ]+)((?P\d{1,2}):(?P\d{1,2}))?', # r'(?P\d{4})-(?P\d{2})-(?P\d{2}) (?P[^ ]+)((?P\d{1,2}):(?P\d{1,2}))?') @@ -552,6 +558,35 @@ def parse_contents(raw_contents: List[RawLine]): if len(raw_contents) == 0: return [] + blocks = [] + current_block = [] + + for line in raw_contents: + if len(current_block) == 0: + # Seed the first block + current_line = line.linenum + current_block.append(line) + else: + if line.linenum == current_line + 1: + # Continue with the current block + current_line = line.linenum + current_block.append(line) + else: + # Mark the finishing block as not the last line + current_block.append(RawLine(current_line + 1, '')) + # Split the blocks + blocks.append(current_block) + current_line = line.linenum + current_block = [line] + + # Check that the current block is not left behind + if len(current_block) > 0: + blocks.append(current_block) + + return [parse_content_block(block) for block in blocks] + + +def parse_content_block(raw_contents: List[RawLine]): contents_buff = [] for line in raw_contents: contents_buff.append(line.line) @@ -576,7 +611,7 @@ def parse_contents(raw_contents: List[RawLine]): elif tok_type == TOKEN_TYPE_CLOSE_LINK: contents.append(LinkToken(LinkTokenType.CLOSE)) - return [Text(contents, current_line)] + return Text(contents, current_line) def parse_headline(hl) -> Headline: @@ -776,6 +811,7 @@ class OrgDomReader: "children": [], "keywords": [], "properties": [], + "results": [], # TODO: Move to each specific code block "logbook": [], "structural": [], } @@ -813,10 +849,28 @@ class OrgDomReader: else: self.headline_hierarchy[-1]["contents"].append(raw) + def add_begin_src_line(self, linenum: int, match: re.Match) -> int: + raw = RawLine(linenum, match.group(0)) + if len(self.headline_hierarchy) == 0: + self.contents.append(raw) + else: + self.headline_hierarchy[-1]["contents"].append(raw) + + def add_end_src_line(self, linenum: int, match: re.Match) -> int: + raw = RawLine(linenum, match.group(0)) + if len(self.headline_hierarchy) == 0: + self.contents.append(raw) + else: + self.headline_hierarchy[-1]["contents"].append(raw) + def add_property_drawer_line(self, linenum: int, line: str, match: re.Match) -> int: self.current_drawer = self.headline_hierarchy[-1]["properties"] self.headline_hierarchy[-1]["structural"].append((linenum, line)) + def add_results_drawer_line(self, linenum: int, line: str, match: re.Match) -> int: + self.current_drawer = self.headline_hierarchy[-1]["results"] + self.headline_hierarchy[-1]["structural"].append((linenum, line)) + def add_logbook_drawer_line(self, linenum: int, line: str, match: re.Match) -> int: self.current_drawer = self.headline_hierarchy[-1]["logbook"] self.headline_hierarchy[-1]["structural"].append((linenum, line)) @@ -843,25 +897,40 @@ class OrgDomReader: def read(self, s, environment): lines = s.split("\n") + line_count = len(lines) reader = enumerate(lines) for linenum, line in reader: - if m := RAW_LINE_RE.match(line): - self.add_raw_line(linenum, line) - elif m := HEADLINE_RE.match(line): - self.add_headline(linenum, m) - elif m := KEYWORDS_RE.match(line): - self.add_keyword_line(linenum, m) - elif m := PROPERTY_DRAWER_RE.match(line): - self.add_property_drawer_line(linenum, line, m) - elif m := LOGBOOK_DRAWER_RE.match(line): - self.add_logbook_drawer_line(linenum, line, m) - elif m := DRAWER_END_RE.match(line): - self.add_drawer_end_line(linenum, line, m) - elif m := NODE_PROPERTIES_RE.match(line): - self.add_node_properties_line(linenum, m) - else: - raise NotImplementedError("{}: ‘{}’".format(linenum, line)) + try: + last_line = linenum + 1 == line_count + + if m := RAW_LINE_RE.match(line): + self.add_raw_line(linenum, line) + elif m := HEADLINE_RE.match(line): + self.add_headline(linenum, m) + elif m := KEYWORDS_RE.match(line): + self.add_keyword_line(linenum, m) + elif m := PROPERTY_DRAWER_RE.match(line): + self.add_property_drawer_line(linenum, line, m) + elif m := LOGBOOK_DRAWER_RE.match(line): + self.add_logbook_drawer_line(linenum, line, m) + elif m := DRAWER_END_RE.match(line): + self.add_drawer_end_line(linenum, line, m) + elif m := RESULTS_DRAWER_RE.match(line): + self.add_results_drawer_line(linenum, line, m) + elif m := NODE_PROPERTIES_RE.match(line): + self.add_node_properties_line(linenum, m) + # Org-babel + elif m := BEGIN_SRC_RE.match(line): + self.add_begin_src_line(linenum, m) + elif m := END_SRC_RE.match(line): + self.add_end_src_line(linenum, m) + # Not captured + else: + self.add_raw_line(linenum, line) + except: + logging.error("Error line {}: {}".format(linenum + 1, line)) + raise def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=True): diff --git a/tests/04-code.org b/tests/04-code.org new file mode 100644 index 0000000..35f4f78 --- /dev/null +++ b/tests/04-code.org @@ -0,0 +1,34 @@ +#+TITLE: 04-Code +#+DESCRIPTION: Simple org file +#+TODO: TODO(t) PAUSED(p) | DONE(d) + + +* First Item +:PROPERTIES: +:ID: 04-code-first-item-id +:CREATED: [2020-01-01 Wed 01:01] +:END: + +#+BEGIN_SRC shell +echo "This is a test" +exit 0 # Exit successfully +#+END_SRC + +#+RESULTS: +: This is a test + +* Second item + :PROPERTIES: + :ID: 04-code-second-item-id + :CREATED: [2020-01-01 Wed 01:01] + :END: + + #+BEGIN_SRC shell :results drawer + echo "This is another test" + exit 0 # Comment + #+END_SRC + + #+RESULTS: + :results: + This is another test + :end: diff --git a/tests/test_dom.py b/tests/test_dom.py index 64433e6..610787b 100644 --- a/tests/test_dom.py +++ b/tests/test_dom.py @@ -5,19 +5,8 @@ from datetime import datetime as DT from org_dom import dumps, load, loads -from utils.dom_assertions import ( - BOLD, - CODE, - HL, - ITALIC, - SPAN, - STRIKE, - UNDERLINED, - VERBATIM, - WEB_LINK, - Dom, - Tokens, -) +from utils.dom_assertions import (BOLD, CODE, HL, ITALIC, SPAN, STRIKE, + UNDERLINED, VERBATIM, WEB_LINK, Dom, Tokens) DIR = os.path.dirname(os.path.abspath(__file__)) @@ -255,3 +244,30 @@ class TestSerde(unittest.TestCase): ) ex.assert_matches(self, doc) + + def test_mimic_write_file_04(self): + with open(os.path.join(DIR, "04-code.org")) as f: + orig = f.read() + doc = loads(orig) + + self.assertEqual(dumps(doc), orig) + + def test_code_file_04(self): + with open(os.path.join(DIR, "04-code.org")) as f: + doc = load(f) + + snippets = list(doc.get_code_snippets()) + self.assertEqual(len(snippets), 2) + self.assertEqual( + snippets[0].content, + 'echo "This is a test"\n' + "exit 0 # Exit successfully", + ) + self.assertEqual( + snippets[0].result, + "This is a test", + ) + + self.assertEqual( + snippets[1].content, 'echo "This is another test"\n' + "exit 0 # Comment" + ) + self.assertEqual(snippets[1].result, "This is another test")