Add basic BEGIN_SRC block support.

2020-12-06 00:26:44 +01:00 · 2020-12-06 00:26:44 +01:00 · ffe6f007fc
commit ffe6f007fc
parent a5bfeadfeb
3 changed files with 161 additions and 24 deletions
--- a/org_dom/org_dom.py
+++ b/org_dom/org_dom.py
@ -63,10 +63,10 @@ ACTIVE_TIME_STAMP_RE = re.compile(r"<{}>".format(BASE_TIME_STAMP_RE))
 INACTIVE_TIME_STAMP_RE = re.compile(r"\[{}\]".format(BASE_TIME_STAMP_RE))

 # Org-Babel
-BEGIN_SRC_RE = re.compile(r"^\s*#\+BEGIN_SRC(\s+(?P<content>.*))?$")
-END_SRC_RE = re.compile(r"^\s*#\+END_SRC\s*$")
-RESULTS_DRAWER_RE = re.compile(r"^\s*:results:\s*$")
-
+BEGIN_SRC_RE = re.compile(r"^\s*#\+BEGIN_SRC(?P<content>.*)$", re.I)
+END_SRC_RE = re.compile(r"^\s*#\+END_SRC\s*$", re.I)
+RESULTS_DRAWER_RE = re.compile(r"^\s*:results:\s*$", re.I)
+CodeSnippet = collections.namedtuple("CodeSnippet", ("name", "content", "result"))

 # BASE_TIME_RANGE_RE = (r'(?P<start_year>\d{4})-(?P<start_month>\d{2})-(?P<start_day>\d{2}) (?P<start_dow>[^ ]+)((?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2}))?',
 #                       r'(?P<end_year>\d{4})-(?P<end_month>\d{2})-(?P<end_day>\d{2}) (?P<end_dow>[^ ]+)((?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?')
@ -122,6 +122,7 @@ class Headline:
        contents,
        children,
        structural,
+        delimiters,
    ):
        self.start_line = start_line
        self.depth = depth
@ -137,11 +138,112 @@ class Headline:
        self.contents = contents
        self.children = children
        self.structural = structural
+        self.delimiters = delimiters

    def get_links(self):
        for content in self.contents:
            yield from get_links_from_content(content)

+    def get_lines_between(self, start, end):
+        for line in self.contents:
+            if start <= line.linenum < end:
+                yield "".join(line.contents)
+
+    def get_element_in_line(self, linenum):
+        for line in self.contents:
+            if linenum == line.linenum:
+                return line
+
+        for (s_lnum, struc) in self.structural:
+            if linenum == s_lnum:
+                return ("structural", struc)
+
+    def get_structural_end_after(self, linenum):
+        for (s_lnum, struc) in self.structural:
+            if s_lnum > linenum and struc.strip().upper() == ":END:":
+                return (s_lnum, struc)
+
+    def get_code_snippets(self):
+        inside_code = False
+
+        sections = []
+
+        for delimiter in self.delimiters:
+            if delimiter.delimiter_type == DelimiterLineType.BEGIN_SRC:
+                line_start = delimiter.linenum
+                inside_code = True
+            elif delimiter.delimiter_type == DelimiterLineType.END_SRC:
+                inside_code = False
+                start, end = line_start, delimiter.linenum
+
+                lines = self.get_lines_between(start + 1, end)
+                contents = "\n".join(lines)
+                if contents.endswith("\n"):
+                    # This is not ideal, but to avoid having to do this maybe
+                    # the content parsing must be re-thinked
+                    contents = contents[:-1]
+
+                sections.append(
+                    {
+                        "line_first": start + 1,
+                        "line_last": end - 1,
+                        "content": contents,
+                    }
+                )
+                line_start = None
+
+        for kword in self.keywords:
+            if kword.key.upper() == "RESULTS":
+                for snippet in sections:
+                    if kword.linenum > snippet["line_last"]:
+                        result_first = self.get_element_in_line(kword.linenum + 1)
+
+                        if isinstance(result_first, Text):
+                            result = "\n".join(result_first.contents)
+                            snippet["result"] = result
+
+                            if result.strip().startswith(": "):
+                                # Split lines and remove ':'
+                                lines = result.split("\n")
+                                s_result = []
+                                for line in lines:
+                                    if ": " not in line:
+                                        break
+                                    s_result.append(line.lstrip(" ")[2:])
+                                snippet["result"] = "\n".join(s_result)
+                        elif (
+                            isinstance(result_first, tuple)
+                            and len(result_first) == 2
+                            and result_first[0] == "structural"
+                            and result_first[1].strip().upper() == ":RESULTS:"
+                        ):
+
+                            (end_line, _) = self.get_structural_end_after(
+                                kword.linenum + 1
+                            )
+                            contents = "\n".join(
+                                self.get_lines_between(kword.linenum + 1, end_line)
+                            )
+                            indentation = result_first[1].index(":")
+                            dedented = "\n".join(
+                                [line[indentation:] for line in contents.split("\n")]
+                            )
+                            if dedented.endswith("\n"):
+                                dedented = dedented[:-1]
+
+                            snippet["result"] = dedented
+
+                        break
+
+        results = []
+        for section in sections:
+            name = None
+            content = section["content"]
+            code_result = section.get("result", None)
+            results.append(CodeSnippet(name=name, content=content, result=code_result))
+
+        return results
+

 RawLine = collections.namedtuple("RawLine", ("linenum", "line"))
 Keyword = collections.namedtuple(
@ -159,6 +261,16 @@ Timestamp = collections.namedtuple(
 )


+class DelimiterLineType(Enum):
+    BEGIN_SRC = 1
+    END_SRC = 2
+
+
+DelimiterLine = collections.namedtuple(
+    "DelimiterLine", ("linenum", "line", "delimiter_type")
+)
+
+
 class MarkerType(Enum):
    NO_MODE = 0b0
    BOLD_MODE = 0b1
@ -573,7 +685,7 @@ def parse_contents(raw_contents: List[RawLine]):
                current_block.append(line)
            else:
                # Mark the finishing block as not the last line
-                current_block.append(RawLine(current_line + 1, ''))
+                current_block.append(RawLine(current_line + 1, ""))
                # Split the blocks
                blocks.append(current_block)
                current_line = line.linenum
@ -633,6 +745,7 @@ def parse_headline(hl) -> Headline:
        keywords=hl["keywords"],
        properties=hl["properties"],
        structural=hl["structural"],
+        delimiters=hl["delimiters"],
        title_start=None,
        priority=None,
        priority_start=None,
@ -664,6 +777,10 @@ class OrgDom:
    def getTopHeadlines(self):
        return self.headlines

+    def get_code_snippets(self):
+        for headline in self.headlines:
+            yield from headline.get_code_snippets()
+
    # Writing
    def dump_kw(self, kw):
        options = kw.match.group("options")
@ -711,6 +828,9 @@ class OrgDom:
    def dump_structural(self, structural: Tuple):
        return (structural[0], structural[1])

+    def dump_delimiters(self, line: DelimiterLine):
+        return (line.linenum, line.line)
+
    def dump_headline(self, headline):
        yield "*" * headline.depth + " " + headline.orig.group(
            "spacing"
@ -733,6 +853,9 @@ class OrgDom:
        for struct in headline.structural:
            lines.append((STRUCTURAL_T, self.dump_structural(struct)))

+        for content in headline.delimiters:
+            lines.append((STRUCTURAL_T, self.dump_delimiters(content)))
+
        lines = sorted(lines, key=lambda x: x[1][0])

        structured_lines = []
@ -793,6 +916,7 @@ class OrgDomReader:
        self.keywords: List[Property] = []
        self.headline_hierarchy: List[OrgDom] = []
        self.contents: List[RawLine] = []
+        self.delimiters: List[DelimiterLine] = []

    def finalize(self):
        return OrgDom(self.headlines, self.keywords, self.contents)
@ -811,9 +935,10 @@ class OrgDomReader:
            "children": [],
            "keywords": [],
            "properties": [],
-            "results": [],  # TODO: Move to each specific code block
            "logbook": [],
            "structural": [],
+            "delimiters": [],
+            "results": [],  # TODO: Move to each specific code block?
        }

        while (depth - 2) > len(self.headline_hierarchy):
@ -850,18 +975,18 @@ class OrgDomReader:
            self.headline_hierarchy[-1]["contents"].append(raw)

    def add_begin_src_line(self, linenum: int, match: re.Match) -> int:
-        raw = RawLine(linenum, match.group(0))
+        line = DelimiterLine(linenum, match.group(0), DelimiterLineType.BEGIN_SRC)
        if len(self.headline_hierarchy) == 0:
-            self.contents.append(raw)
+            self.delimiters.append(line)
        else:
-            self.headline_hierarchy[-1]["contents"].append(raw)
+            self.headline_hierarchy[-1]["delimiters"].append(line)

    def add_end_src_line(self, linenum: int, match: re.Match) -> int:
-        raw = RawLine(linenum, match.group(0))
+        line = DelimiterLine(linenum, match.group(0), DelimiterLineType.END_SRC)
        if len(self.headline_hierarchy) == 0:
-            self.contents.append(raw)
+            self.delimiters.append(line)
        else:
-            self.headline_hierarchy[-1]["contents"].append(raw)
+            self.headline_hierarchy[-1]["delimiters"].append(line)

    def add_property_drawer_line(self, linenum: int, line: str, match: re.Match) -> int:
        self.current_drawer = self.headline_hierarchy[-1]["properties"]
@ -908,6 +1033,12 @@ class OrgDomReader:
                    self.add_raw_line(linenum, line)
                elif m := HEADLINE_RE.match(line):
                    self.add_headline(linenum, m)
+                # Org-babel
+                elif m := BEGIN_SRC_RE.match(line):
+                    self.add_begin_src_line(linenum, m)
+                elif m := END_SRC_RE.match(line):
+                    self.add_end_src_line(linenum, m)
+                # Generic properties
                elif m := KEYWORDS_RE.match(line):
                    self.add_keyword_line(linenum, m)
                elif m := PROPERTY_DRAWER_RE.match(line):
@ -920,11 +1051,6 @@ class OrgDomReader:
                    self.add_results_drawer_line(linenum, line, m)
                elif m := NODE_PROPERTIES_RE.match(line):
                    self.add_node_properties_line(linenum, m)
-                # Org-babel
-                elif m := BEGIN_SRC_RE.match(line):
-                    self.add_begin_src_line(linenum, m)
-                elif m := END_SRC_RE.match(line):
-                    self.add_end_src_line(linenum, m)
                # Not captured
                else:
                    self.add_raw_line(linenum, line)
--- a/tests/04-code.org
+++ b/tests/04-code.org
@ -9,13 +9,15 @@
 :CREATED:  [2020-01-01 Wed 01:01]
 :END:

-#+BEGIN_SRC shell
+#+BEGIN_SRC shell :results verbatim
 echo "This is a test"
+echo "with two lines"
 exit 0 # Exit successfully
 #+END_SRC

 #+RESULTS:
 : This is a test
+: with two lines

 * Second item
    :PROPERTIES:
@ -25,10 +27,12 @@ exit 0 # Exit successfully

    #+BEGIN_SRC shell :results drawer
 echo "This is another test"
+echo "with two lines too"
 exit 0 # Comment
    #+END_SRC

    #+RESULTS:
    :results:
    This is another test
+    with two lines too
    :end:
--- a/tests/test_dom.py
+++ b/tests/test_dom.py
@ -260,14 +260,21 @@ class TestSerde(unittest.TestCase):
        self.assertEqual(len(snippets), 2)
        self.assertEqual(
            snippets[0].content,
-            'echo "This is a test"\n' + "exit 0 # Exit successfully",
+            'echo "This is a test"\n'
+            + 'echo "with two lines"\n'
+            + "exit 0 # Exit successfully",
        )
        self.assertEqual(
            snippets[0].result,
-            "This is a test",
+            "This is a test\n" + "with two lines",
        )

        self.assertEqual(
-            snippets[1].content, 'echo "This is another test"\n' + "exit 0 # Comment"
+            snippets[1].content,
+            'echo "This is another test"\n'
+            + 'echo "with two lines too"\n'
+            + "exit 0 # Comment",
+        )
+        self.assertEqual(
+            snippets[1].result, "This is another test\n" + "with two lines too"
        )
-        self.assertEqual(snippets[1].result, "This is another test")