From 8832cd0b3d4e7646ed9d9c277c2c11b976f3b4b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Thu, 17 Nov 2022 00:20:20 +0100
Subject: [PATCH] Fix extraction of block element contents.

Don't confound normal characters with formatting markers. Handle escaping of
    otherwise headline starters with a comma.
---
 org_rw/org_rw.py  | 32 +++++++++++++++++++++++---------
 tests/04-code.org | 13 +++++++++++++
 tests/test_org.py | 10 +++++++++-
 3 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py
index 4fbfdb3..e3534a8 100644
--- a/org_rw/org_rw.py
+++ b/org_rw/org_rw.py
@@ -154,6 +154,26 @@ class RangeInRaw:
             contents.insert(start_idx + i + 1, element)
 
 
+def unescape_block_lines(lines: str) -> str:
+    """
+    Remove leading ',' from block_lines if they escape `*` characters.
+    """
+    i = 0
+    lines = lines.split('\n')
+    while i < len(lines):
+        line = lines[i]
+        if (line.lstrip(' ').startswith(',')
+            and line.lstrip(' ,').startswith('*')
+        ):
+            # Remove leading ','
+            lead_pos = line.index(',')
+            line = line[:lead_pos] + line[lead_pos + 1:]
+            lines[i] = line
+
+        i += 1
+
+    return '\n'.join(lines)
+
 def get_links_from_content(content):
     in_link = False
     in_description = False
@@ -356,7 +376,7 @@ class Headline:
                     end = line.linenum
 
                     lines = self.get_lines_between(start + 1, end)
-                    contents = "\n".join(lines)
+                    contents = unescape_block_lines("\n".join(lines))
                     if contents.endswith("\n"):
                         # This is not ideal, but to avoid having to do this maybe
                         # the content parsing must be re-thinked
@@ -708,13 +728,7 @@ class Headline:
     def get_lines_between(self, start, end):
         for line in self.contents:
             if start <= line.linenum < end:
-                text = []
-                for item in line.contents:
-                    if isinstance(item, str):
-                        text.append(item)
-                    elif isinstance(item, MarkerType):
-                        text.append(ModeToMarker[item])
-                yield "".join(text)
+                yield "".join(line.get_raw())
 
     def get_contents(self, format):
         if format == "raw":
@@ -776,7 +790,7 @@ class Headline:
                 start, end = line_start, delimiter.linenum
 
                 lines = self.get_lines_between(start + 1, end)
-                contents = "\n".join(lines)
+                contents = unescape_block_lines("\n".join(lines))
                 if contents.endswith("\n"):
                     # This is not ideal, but to avoid having to do this maybe
                     # the content parsing must be re-thinked
diff --git a/tests/04-code.org b/tests/04-code.org
index 161dc2f..956d961 100644
--- a/tests/04-code.org
+++ b/tests/04-code.org
@@ -36,3 +36,16 @@ exit 0 # Comment
     This is another test
     with two lines too
     :end:
+
+* Escaped code
+    :PROPERTIES:
+    :ID:       04-code-escaped-code-id
+    :CREATED:  [2020-01-01 Wed 01:01]
+    :END:
+
+    #+BEGIN_SRC c :results drawer
+/* This code has to be escaped to
+ ,* avoid confusion with new headlines.
+ ,*/
+main(){}
+    #+END_SRC
diff --git a/tests/test_org.py b/tests/test_org.py
index da980c5..8afd6a0 100644
--- a/tests/test_org.py
+++ b/tests/test_org.py
@@ -433,7 +433,7 @@ class TestSerde(unittest.TestCase):
             doc = load(f)
 
         snippets = list(doc.get_code_snippets())
-        self.assertEqual(len(snippets), 2)
+        self.assertEqual(len(snippets), 3)
         self.assertEqual(
             snippets[0].content,
             'echo "This is a test"\n'
@@ -456,6 +456,14 @@ class TestSerde(unittest.TestCase):
             snippets[1].result, "This is another test\n" + "with two lines too"
         )
 
+        self.assertEqual(
+            snippets[2].content,
+            '/* This code has to be escaped to\n'
+            + ' * avoid confusion with new headlines.\n'
+            + ' */\n'
+            + 'main(){}',
+        )
+
     def test_mimic_write_file_05(self):
         with open(os.path.join(DIR, "05-dates.org")) as f:
             orig = f.read()