From 8832cd0b3d4e7646ed9d9c277c2c11b976f3b4b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Thu, 17 Nov 2022 00:20:20 +0100 Subject: [PATCH] Fix extraction of block element contents. Don't confound normal characters with formatting markers. Handle escaping of otherwise headline starters with a comma. --- org_rw/org_rw.py | 32 +++++++++++++++++++++++--------- tests/04-code.org | 13 +++++++++++++ tests/test_org.py | 10 +++++++++- 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 4fbfdb3..e3534a8 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -154,6 +154,26 @@ class RangeInRaw: contents.insert(start_idx + i + 1, element) +def unescape_block_lines(lines: str) -> str: + """ + Remove leading ',' from block_lines if they escape `*` characters. + """ + i = 0 + lines = lines.split('\n') + while i < len(lines): + line = lines[i] + if (line.lstrip(' ').startswith(',') + and line.lstrip(' ,').startswith('*') + ): + # Remove leading ',' + lead_pos = line.index(',') + line = line[:lead_pos] + line[lead_pos + 1:] + lines[i] = line + + i += 1 + + return '\n'.join(lines) + def get_links_from_content(content): in_link = False in_description = False @@ -356,7 +376,7 @@ class Headline: end = line.linenum lines = self.get_lines_between(start + 1, end) - contents = "\n".join(lines) + contents = unescape_block_lines("\n".join(lines)) if contents.endswith("\n"): # This is not ideal, but to avoid having to do this maybe # the content parsing must be re-thinked @@ -708,13 +728,7 @@ class Headline: def get_lines_between(self, start, end): for line in self.contents: if start <= line.linenum < end: - text = [] - for item in line.contents: - if isinstance(item, str): - text.append(item) - elif isinstance(item, MarkerType): - text.append(ModeToMarker[item]) - yield "".join(text) + yield "".join(line.get_raw()) def get_contents(self, format): if format == "raw": @@ -776,7 +790,7 @@ class Headline: start, end = line_start, delimiter.linenum lines = self.get_lines_between(start + 1, end) - contents = "\n".join(lines) + contents = unescape_block_lines("\n".join(lines)) if contents.endswith("\n"): # This is not ideal, but to avoid having to do this maybe # the content parsing must be re-thinked diff --git a/tests/04-code.org b/tests/04-code.org index 161dc2f..956d961 100644 --- a/tests/04-code.org +++ b/tests/04-code.org @@ -36,3 +36,16 @@ exit 0 # Comment This is another test with two lines too :end: + +* Escaped code + :PROPERTIES: + :ID: 04-code-escaped-code-id + :CREATED: [2020-01-01 Wed 01:01] + :END: + + #+BEGIN_SRC c :results drawer +/* This code has to be escaped to + ,* avoid confusion with new headlines. + ,*/ +main(){} + #+END_SRC diff --git a/tests/test_org.py b/tests/test_org.py index da980c5..8afd6a0 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -433,7 +433,7 @@ class TestSerde(unittest.TestCase): doc = load(f) snippets = list(doc.get_code_snippets()) - self.assertEqual(len(snippets), 2) + self.assertEqual(len(snippets), 3) self.assertEqual( snippets[0].content, 'echo "This is a test"\n' @@ -456,6 +456,14 @@ class TestSerde(unittest.TestCase): snippets[1].result, "This is another test\n" + "with two lines too" ) + self.assertEqual( + snippets[2].content, + '/* This code has to be escaped to\n' + + ' * avoid confusion with new headlines.\n' + + ' */\n' + + 'main(){}', + ) + def test_mimic_write_file_05(self): with open(os.path.join(DIR, "05-dates.org")) as f: orig = f.read()