From ffe6f007fc5904d041a35b97b8a1b907b5400964 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Sun, 6 Dec 2020 00:26:44 +0100
Subject: [PATCH] Add basic BEGIN_SRC block support.

---
 org_dom/org_dom.py | 160 ++++++++++++++++++++++++++++++++++++++++-----
 tests/04-code.org  |  10 ++-
 tests/test_dom.py  |  15 +++--
 3 files changed, 161 insertions(+), 24 deletions(-)
diff --git a/org_dom/org_dom.py b/org_dom/org_dom.py
index 500d078..baa42d1 100644
--- a/org_dom/org_dom.py
+++ b/org_dom/org_dom.py
@@ -63,10 +63,10 @@ ACTIVE_TIME_STAMP_RE = re.compile(r"<{}>".format(BASE_TIME_STAMP_RE))
 INACTIVE_TIME_STAMP_RE = re.compile(r"\[{}\]".format(BASE_TIME_STAMP_RE))
 
 # Org-Babel
-BEGIN_SRC_RE = re.compile(r"^\s*#\+BEGIN_SRC(\s+(?P<content>.*))?$")
-END_SRC_RE = re.compile(r"^\s*#\+END_SRC\s*$")
-RESULTS_DRAWER_RE = re.compile(r"^\s*:results:\s*$")
-
+BEGIN_SRC_RE = re.compile(r"^\s*#\+BEGIN_SRC(?P<content>.*)$", re.I)
+END_SRC_RE = re.compile(r"^\s*#\+END_SRC\s*$", re.I)
+RESULTS_DRAWER_RE = re.compile(r"^\s*:results:\s*$", re.I)
+CodeSnippet = collections.namedtuple("CodeSnippet", ("name", "content", "result"))
 
 # BASE_TIME_RANGE_RE = (r'(?P<start_year>\d{4})-(?P<start_month>\d{2})-(?P<start_day>\d{2}) (?P<start_dow>[^ ]+)((?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2}))?',
 #                       r'(?P<end_year>\d{4})-(?P<end_month>\d{2})-(?P<end_day>\d{2}) (?P<end_dow>[^ ]+)((?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?')
@@ -122,6 +122,7 @@ class Headline:
         contents,
         children,
         structural,
+        delimiters,
     ):
         self.start_line = start_line
         self.depth = depth
@@ -137,11 +138,112 @@ class Headline:
         self.contents = contents
         self.children = children
         self.structural = structural
+        self.delimiters = delimiters
 
     def get_links(self):
         for content in self.contents:
             yield from get_links_from_content(content)
 
+    def get_lines_between(self, start, end):
+        for line in self.contents:
+            if start <= line.linenum < end:
+                yield "".join(line.contents)
+
+    def get_element_in_line(self, linenum):
+        for line in self.contents:
+            if linenum == line.linenum:
+                return line
+
+        for (s_lnum, struc) in self.structural:
+            if linenum == s_lnum:
+                return ("structural", struc)
+
+    def get_structural_end_after(self, linenum):
+        for (s_lnum, struc) in self.structural:
+            if s_lnum > linenum and struc.strip().upper() == ":END:":
+                return (s_lnum, struc)
+
+    def get_code_snippets(self):
+        inside_code = False
+
+        sections = []
+
+        for delimiter in self.delimiters:
+            if delimiter.delimiter_type == DelimiterLineType.BEGIN_SRC:
+                line_start = delimiter.linenum
+                inside_code = True
+            elif delimiter.delimiter_type == DelimiterLineType.END_SRC:
+                inside_code = False
+                start, end = line_start, delimiter.linenum
+
+                lines = self.get_lines_between(start + 1, end)
+                contents = "\n".join(lines)
+                if contents.endswith("\n"):
+                    # This is not ideal, but to avoid having to do this maybe
+                    # the content parsing must be re-thinked
+                    contents = contents[:-1]
+
+                sections.append(
+                    {
+                        "line_first": start + 1,
+                        "line_last": end - 1,
+                        "content": contents,
+                    }
+                )
+                line_start = None
+
+        for kword in self.keywords:
+            if kword.key.upper() == "RESULTS":
+                for snippet in sections:
+                    if kword.linenum > snippet["line_last"]:
+                        result_first = self.get_element_in_line(kword.linenum + 1)
+
+                        if isinstance(result_first, Text):
+                            result = "\n".join(result_first.contents)
+                            snippet["result"] = result
+
+                            if result.strip().startswith(": "):
+                                # Split lines and remove ':'
+                                lines = result.split("\n")
+                                s_result = []
+                                for line in lines:
+                                    if ": " not in line:
+                                        break
+                                    s_result.append(line.lstrip(" ")[2:])
+                                snippet["result"] = "\n".join(s_result)
+                        elif (
+                            isinstance(result_first, tuple)
+                            and len(result_first) == 2
+                            and result_first[0] == "structural"
+                            and result_first[1].strip().upper() == ":RESULTS:"
+                        ):
+
+                            (end_line, _) = self.get_structural_end_after(
+                                kword.linenum + 1
+                            )
+                            contents = "\n".join(
+                                self.get_lines_between(kword.linenum + 1, end_line)
+                            )
+                            indentation = result_first[1].index(":")
+                            dedented = "\n".join(
+                                [line[indentation:] for line in contents.split("\n")]
+                            )
+                            if dedented.endswith("\n"):
+                                dedented = dedented[:-1]
+
+                            snippet["result"] = dedented
+
+                        break
+
+        results = []
+        for section in sections:
+            name = None
+            content = section["content"]
+            code_result = section.get("result", None)
+            results.append(CodeSnippet(name=name, content=content, result=code_result))
+
+        return results
+
 
 RawLine = collections.namedtuple("RawLine", ("linenum", "line"))
 Keyword = collections.namedtuple(
@@ -159,6 +261,16 @@ Timestamp = collections.namedtuple(
 )
 
 
+class DelimiterLineType(Enum):
+    BEGIN_SRC = 1
+    END_SRC = 2
+
+
+DelimiterLine = collections.namedtuple(
+    "DelimiterLine", ("linenum", "line", "delimiter_type")
+)
+
+
 class MarkerType(Enum):
     NO_MODE = 0b0
     BOLD_MODE = 0b1
@@ -573,7 +685,7 @@ def parse_contents(raw_contents: List[RawLine]):
                 current_block.append(line)
             else:
                 # Mark the finishing block as not the last line
-                current_block.append(RawLine(current_line + 1, ''))
+                current_block.append(RawLine(current_line + 1, ""))
                 # Split the blocks
                 blocks.append(current_block)
                 current_line = line.linenum
@@ -633,6 +745,7 @@ def parse_headline(hl) -> Headline:
         keywords=hl["keywords"],
         properties=hl["properties"],
         structural=hl["structural"],
+        delimiters=hl["delimiters"],
         title_start=None,
         priority=None,
         priority_start=None,
@@ -664,6 +777,10 @@ class OrgDom:
     def getTopHeadlines(self):
         return self.headlines
 
+    def get_code_snippets(self):
+        for headline in self.headlines:
+            yield from headline.get_code_snippets()
+
     # Writing
     def dump_kw(self, kw):
         options = kw.match.group("options")
@@ -711,6 +828,9 @@ class OrgDom:
     def dump_structural(self, structural: Tuple):
         return (structural[0], structural[1])
 
+    def dump_delimiters(self, line: DelimiterLine):
+        return (line.linenum, line.line)
+
     def dump_headline(self, headline):
         yield "*" * headline.depth + " " + headline.orig.group(
             "spacing"
@@ -733,6 +853,9 @@ class OrgDom:
         for struct in headline.structural:
             lines.append((STRUCTURAL_T, self.dump_structural(struct)))
 
+        for content in headline.delimiters:
+            lines.append((STRUCTURAL_T, self.dump_delimiters(content)))
+
         lines = sorted(lines, key=lambda x: x[1][0])
 
         structured_lines = []
@@ -793,6 +916,7 @@ class OrgDomReader:
         self.keywords: List[Property] = []
         self.headline_hierarchy: List[OrgDom] = []
         self.contents: List[RawLine] = []
+        self.delimiters: List[DelimiterLine] = []
 
     def finalize(self):
         return OrgDom(self.headlines, self.keywords, self.contents)
@@ -811,9 +935,10 @@ class OrgDomReader:
             "children": [],
             "keywords": [],
             "properties": [],
-            "results": [],  # TODO: Move to each specific code block
             "logbook": [],
             "structural": [],
+            "delimiters": [],
+            "results": [],  # TODO: Move to each specific code block?
         }
 
         while (depth - 2) > len(self.headline_hierarchy):
@@ -850,18 +975,18 @@ class OrgDomReader:
             self.headline_hierarchy[-1]["contents"].append(raw)
 
     def add_begin_src_line(self, linenum: int, match: re.Match) -> int:
-        raw = RawLine(linenum, match.group(0))
+        line = DelimiterLine(linenum, match.group(0), DelimiterLineType.BEGIN_SRC)
         if len(self.headline_hierarchy) == 0:
-            self.contents.append(raw)
+            self.delimiters.append(line)
         else:
-            self.headline_hierarchy[-1]["contents"].append(raw)
+            self.headline_hierarchy[-1]["delimiters"].append(line)
 
     def add_end_src_line(self, linenum: int, match: re.Match) -> int:
-        raw = RawLine(linenum, match.group(0))
+        line = DelimiterLine(linenum, match.group(0), DelimiterLineType.END_SRC)
         if len(self.headline_hierarchy) == 0:
-            self.contents.append(raw)
+            self.delimiters.append(line)
         else:
-            self.headline_hierarchy[-1]["contents"].append(raw)
+            self.headline_hierarchy[-1]["delimiters"].append(line)
 
     def add_property_drawer_line(self, linenum: int, line: str, match: re.Match) -> int:
         self.current_drawer = self.headline_hierarchy[-1]["properties"]
@@ -908,6 +1033,12 @@ class OrgDomReader:
                     self.add_raw_line(linenum, line)
                 elif m := HEADLINE_RE.match(line):
                     self.add_headline(linenum, m)
+                # Org-babel
+                elif m := BEGIN_SRC_RE.match(line):
+                    self.add_begin_src_line(linenum, m)
+                elif m := END_SRC_RE.match(line):
+                    self.add_end_src_line(linenum, m)
+                # Generic properties
                 elif m := KEYWORDS_RE.match(line):
                     self.add_keyword_line(linenum, m)
                 elif m := PROPERTY_DRAWER_RE.match(line):
@@ -920,11 +1051,6 @@ class OrgDomReader:
                     self.add_results_drawer_line(linenum, line, m)
                 elif m := NODE_PROPERTIES_RE.match(line):
                     self.add_node_properties_line(linenum, m)
-                # Org-babel
-                elif m := BEGIN_SRC_RE.match(line):
-                    self.add_begin_src_line(linenum, m)
-                elif m := END_SRC_RE.match(line):
-                    self.add_end_src_line(linenum, m)
                 # Not captured
                 else:
                     self.add_raw_line(linenum, line)
diff --git a/tests/04-code.org b/tests/04-code.org
index 35f4f78..161dc2f 100644
--- a/tests/04-code.org
+++ b/tests/04-code.org
@@ -9,13 +9,15 @@
 :CREATED:  [2020-01-01 Wed 01:01]
 :END:
 
-#+BEGIN_SRC shell
+#+BEGIN_SRC shell :results verbatim
 echo "This is a test"
+echo "with two lines"
 exit 0 # Exit successfully
 #+END_SRC
 
 #+RESULTS:
 : This is a test
+: with two lines
 
 * Second item
     :PROPERTIES:
@@ -24,11 +26,13 @@ exit 0 # Exit successfully
     :END:
 
     #+BEGIN_SRC shell :results drawer
-      echo "This is another test"
-      exit 0 # Comment
+echo "This is another test"
+echo "with two lines too"
+exit 0 # Comment
     #+END_SRC
 
     #+RESULTS:
     :results:
     This is another test
+    with two lines too
     :end:
diff --git a/tests/test_dom.py b/tests/test_dom.py
index 610787b..7dc23ee 100644
--- a/tests/test_dom.py
+++ b/tests/test_dom.py
@@ -260,14 +260,21 @@ class TestSerde(unittest.TestCase):
         self.assertEqual(len(snippets), 2)
         self.assertEqual(
             snippets[0].content,
-            'echo "This is a test"\n' + "exit 0 # Exit successfully",
+            'echo "This is a test"\n'
+            + 'echo "with two lines"\n'
+            + "exit 0 # Exit successfully",
         )
         self.assertEqual(
             snippets[0].result,
-            "This is a test",
+            "This is a test\n" + "with two lines",
         )
 
         self.assertEqual(
-            snippets[1].content, 'echo "This is another test"\n' + "exit 0 # Comment"
+            snippets[1].content,
+            'echo "This is another test"\n'
+            + 'echo "with two lines too"\n'
+            + "exit 0 # Comment",
+        )
+        self.assertEqual(
+            snippets[1].result, "This is another test\n" + "with two lines too"
         )
-        self.assertEqual(snippets[1].result, "This is another test")