From bf10c51e6194f61b2ec21ed683f768d04f0da2cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Thu, 3 Nov 2022 23:35:56 +0100 Subject: [PATCH 01/14] Indent list-group inside current node if part of a block. --- org_rw/org_rw.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 6cbd04b..ffd6967 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -395,9 +395,12 @@ class Headline: or isinstance(current_node, dom.BlockNode) or isinstance(current_node, dom.DrawerNode) ): + was_node = current_node current_node = dom.ListGroupNode() - if current_node is None: + if was_node is None: tree.append(current_node) + else: + was_node.append(current_node) indentation_tree.append(current_node) if not isinstance(current_node, dom.ListGroupNode): if not isinstance(current_node, dom.ListGroupNode): From ccebe90ea84afa3847a9e3f409922146312344f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Fri, 4 Nov 2022 00:14:13 +0100 Subject: [PATCH 02/14] Fix as_dom of Text found in a result after a list. --- org_rw/org_rw.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index ffd6967..29f2998 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -376,18 +376,32 @@ class Headline: current_node.append(dom.PropertyNode(line.key, line.value)) elif isinstance(line, Text): - if isinstance(current_node, dom.BlockNode): - current_node.append(dom.Text(line)) - elif isinstance(current_node, dom.DrawerNode): - current_node.append(dom.Text(line)) + tree_up = list(indentation_tree) + while len(tree_up) > 0: + node = tree_up[-1] + if (isinstance(node, dom.BlockNode) + or isinstance(node, dom.DrawerNode) + ): + node.append(dom.Text(line)) + current_node = node + contents = [] + break + elif ((not isinstance(node, dom.TableNode)) and + (type(node) not in NON_FINISHED_GROUPS) + ): + raise NotImplementedError('Not implemented node type: {} (headline_id={}, line={}, doc={})'.format( + node, + self.id, + line.linenum, + self.doc.path, + )) + else: + tree_up.pop(-1) else: - if isinstance(current_node, dom.TableNode): - pass # No problem here - elif type(current_node) not in NON_FINISHED_GROUPS: - raise NotImplementedError('Not implemented node type: {}'.format(current_node)) current_node = None contents = [] tree.append(dom.Text(text_to_dom(line.contents, line))) + indentation_tree = tree_up elif isinstance(line, ListItem): if (current_node is None @@ -434,7 +448,7 @@ class Headline: ) > len(line.indentation) ): - rem = indentation_tree.pop() + rem = indentation_tree.pop(-1) if len(indentation_tree) == 0: indentation_tree.append(rem) current_node = rem @@ -515,8 +529,6 @@ class Headline: tree_up.pop(-1) else: raise Exception('Unexpected node ({}) on headline (id={}), line {}'.format(current_node, self.id, linenum)) - if self.id == 'd07fcf27-d6fc-41e3-a9d0-b2e2902aec23': - print("Found node:", current_node) current_node = None elif content.strip().upper() == ":RESULTS:": assert current_node is None From 6c6c375572a99aeb18fd46bba97261773d734d86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sat, 12 Nov 2022 12:37:50 +0100 Subject: [PATCH 03/14] WIP: Add support for multiline list items. Right now this messes with line numbers. --- org_rw/org_rw.py | 131 +++++++++++++++++++++++++++++++-------------- tests/06-lists.org | 12 +++++ tests/test_org.py | 11 ++++ 3 files changed, 114 insertions(+), 40 deletions(-) diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 29f2998..afba2f3 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -549,14 +549,26 @@ class Headline: last_line = None for li in self.list_items: - if last_line == li.linenum - 1: - lists[-1].append(li) - else: + if last_line is None: lists.append([li]) + else: + num_lines = li.linenum - (last_line + 1) + lines_between = ''.join(['\n' + l + for l in self.get_lines_between(last_line + 1, li.linenum)] + ) - last_line = li.linenum + # Only empty lines + if ((num_lines == lines_between.count('\n')) + and (len(lines_between.strip()) == 0) + ): + lists[-1].append(li) + else: + lists.append([li]) + + last_line = li.linenum + sum(c.count('\n') for c in li.content) return lists + # @DEPRECATED: use `get_lists` def getLists(self): return self.get_lists() @@ -838,22 +850,30 @@ Property = collections.namedtuple( "Property", ("linenum", "match", "key", "value", "options") ) -ListItem = collections.namedtuple( - "ListItem", - ( - "linenum", - "match", - "indentation", - "bullet", - "counter", - "counter_sep", - "checkbox_indentation", - "checkbox_value", - "tag_indentation", - "tag", - "content", - ), -) +class ListItem: + def __init__(self, + linenum, match, + indentation, + bullet, counter, counter_sep, + checkbox_indentation, checkbox_value, + tag_indentation, tag, + content, + ): + self.linenum = linenum + self.match = match + self.indentation = indentation + self.bullet = bullet + self.counter = counter + self.counter_sep = counter_sep + self.checkbox_indentation = checkbox_indentation + self.checkbox_value = checkbox_value + self.tag_indentation = tag_indentation + self.tag = tag + self.content = content + + def append_line(self, line): + self.content += parse_content_block('\n' + line[len(self.indentation):]).contents + TableRow = collections.namedtuple( "TableRow", ( @@ -1555,14 +1575,20 @@ def parse_contents(raw_contents: List[RawLine]): return [parse_content_block(block) for block in blocks] -def parse_content_block(raw_contents: List[RawLine]): +def parse_content_block(raw_contents: Union[List[RawLine],str]): contents_buff = [] - for line in raw_contents: - contents_buff.append(line.line) + if isinstance(raw_contents, str): + contents_buff.append(raw_contents) + else: + for line in raw_contents: + contents_buff.append(line.line) contents = "\n".join(contents_buff) tokens = tokenize_contents(contents) - current_line = raw_contents[0].linenum + if isinstance(raw_contents, str): + current_line = None + else: + current_line = raw_contents[0].linenum contents = [] # Use tokens to tag chunks of text with it's container type @@ -1589,7 +1615,12 @@ def dump_contents(raw): elif isinstance(raw, ListItem): bullet = raw.bullet if raw.bullet else raw.counter + raw.counter_sep - content = token_list_to_raw(raw.content) + content_full = token_list_to_raw(raw.content) + content_lines = content_full.split('\n') + content = '\n'.join([content_lines[0], *[ + raw.indentation + line + for line in content_lines[1:] + ]]) checkbox = f"[{raw.checkbox_value}]" if raw.checkbox_value else "" tag = f"{raw.tag_indentation}{token_list_to_raw(raw.tag or '')}::" if raw.tag or raw.tag_indentation else "" return ( @@ -1978,19 +2009,19 @@ class OrgDocReader: def add_list_item_line(self, linenum: int, match: re.Match) -> int: li = ListItem( - linenum, - match, - match.group("indentation"), - match.group("bullet"), - match.group("counter"), - match.group("counter_sep"), - match.group("checkbox_indentation"), - match.group("checkbox_value"), - match.group("tag_indentation"), - parse_content_block( + linenum=linenum, + match=match, + indentation=match.group("indentation"), + bullet=match.group("bullet"), + counter=match.group("counter"), + counter_sep=match.group("counter_sep"), + checkbox_indentation=match.group("checkbox_indentation"), + checkbox_value=match.group("checkbox_value"), + tag_indentation=match.group("tag_indentation"), + tag=parse_content_block( [RawLine(linenum=linenum, line=match.group("tag"))] ).contents if match.group("tag") else None, - parse_content_block( + content=parse_content_block( [RawLine(linenum=linenum, line=match.group("content"))] ).contents, ) @@ -1999,6 +2030,7 @@ class OrgDocReader: self.list_items.append(li) else: self.headline_hierarchy[-1]["list_items"].append(li) + return li def add_table_line(self, linenum: int, line: str) -> int: chunks = line.split('|') @@ -2108,6 +2140,22 @@ class OrgDocReader: reader = enumerate(lines) in_drawer = False in_block = False + list_item_indentation = None + list_item = None + + def add_raw_line_with_possible_indentation(linenum, line): + added = False + nonlocal list_item + nonlocal list_item_indentation + if list_item: + if line.startswith(list_item_indentation): + list_item.append_line(line) + added = True + elif len(line.strip()) > 0: + list_item = None + list_item_indentation = None + if not added: + self.add_raw_line(linenum, line) for lnum, line in reader: linenum = lnum + 1 @@ -2117,14 +2165,17 @@ class OrgDocReader: self.add_end_block_line(linenum, m) in_block = False else: - self.add_raw_line(linenum, line) + add_raw_line_with_possible_indentation(linenum, line) elif m := HEADLINE_RE.match(line): + list_item_indentation = None + list_item = None self.add_headline(linenum, m) elif m := LIST_ITEM_RE.match(line): - self.add_list_item_line(linenum, m) + list_item = self.add_list_item_line(linenum, m) + list_item_indentation = m.group("indentation") elif m := RAW_LINE_RE.match(line): - self.add_raw_line(linenum, line) + add_raw_line_with_possible_indentation(linenum, line) # Org-babel elif m := BEGIN_BLOCK_RE.match(line): self.add_begin_block_line(linenum, m) @@ -2150,7 +2201,7 @@ class OrgDocReader: self.add_table_line(linenum, line) # Not captured else: - self.add_raw_line(linenum, line) + add_raw_line_with_possible_indentation(linenum, line) except: logging.error("Error line {}: {}".format(linenum + 1, line)) raise diff --git a/tests/06-lists.org b/tests/06-lists.org index b80e358..af4b056 100644 --- a/tests/06-lists.org +++ b/tests/06-lists.org @@ -51,3 +51,15 @@ Also with markup - _Key_ :: _Value_ - /Key/ 2 :: /Value/ 2 + +* List with multiline elements + :PROPERTIES: + :ID: 07-list-with-multiline-elements + :CREATED: [2020-01-01 Wed 01:01] + :END: + + - This is a list item... + that spans multiple lines + + - This is another list item... + that has content on multiple lines diff --git a/tests/test_org.py b/tests/test_org.py index 3509ffc..1f642cb 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -579,6 +579,17 @@ class TestSerde(unittest.TestCase): self.assertEqual(lists2[1][1].counter, "2") self.assertEqual(lists2[1][1].counter_sep, ")") + hl4 = doc.getTopHeadlines()[3] + # ... + lists4 = hl4.getLists() + print(lists4) + self.assertEqual(len(lists4), 1) + + self.assertEqual(lists4[0][0].content, ["This is a list item...", "\n that spans multiple lines"]) + self.assertEqual(lists4[0][0].bullet, "-") + self.assertEqual(lists4[0][1].content, ["This is another list item...", "\n that has content on multiple lines"]) + self.assertEqual(lists4[0][1].bullet, "-") + def test_org_roam_07(self): with open(os.path.join(DIR, "07-org-roam-v2.org")) as f: orig = f.read() From b81990445729b61e6b3f8c9947351e52dae12962 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sat, 12 Nov 2022 18:01:00 +0100 Subject: [PATCH 04/14] Fix handling of block elements after multiline list items. --- org_rw/org_rw.py | 32 +++++++++++++++++++++++++------- tests/06-lists.org | 10 ++++++++++ tests/test_org.py | 15 +++++++++------ 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index afba2f3..0021acd 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -871,8 +871,12 @@ class ListItem: self.tag = tag self.content = content + @property + def text_start_pos(self): + return len(self.indentation) + 1 # Indentation + bullet + def append_line(self, line): - self.content += parse_content_block('\n' + line[len(self.indentation):]).contents + self.content += parse_content_block('\n' + line).contents TableRow = collections.namedtuple( "TableRow", @@ -1617,10 +1621,7 @@ def dump_contents(raw): bullet = raw.bullet if raw.bullet else raw.counter + raw.counter_sep content_full = token_list_to_raw(raw.content) content_lines = content_full.split('\n') - content = '\n'.join([content_lines[0], *[ - raw.indentation + line - for line in content_lines[1:] - ]]) + content = '\n'.join(content_lines) checkbox = f"[{raw.checkbox_value}]" if raw.checkbox_value else "" tag = f"{raw.tag_indentation}{token_list_to_raw(raw.tag or '')}::" if raw.tag or raw.tag_indentation else "" return ( @@ -2148,12 +2149,15 @@ class OrgDocReader: nonlocal list_item nonlocal list_item_indentation if list_item: - if line.startswith(list_item_indentation): + if ((line[:list_item.text_start_pos].strip() == '') + or (len(line.strip()) == 0) + ): list_item.append_line(line) added = True - elif len(line.strip()) > 0: + else: list_item = None list_item_indentation = None + if not added: self.add_raw_line(linenum, line) @@ -2164,6 +2168,8 @@ class OrgDocReader: if m := END_BLOCK_RE.match(line): self.add_end_block_line(linenum, m) in_block = False + list_item_indentation = None + list_item = None else: add_raw_line_with_possible_indentation(linenum, line) @@ -2180,25 +2186,37 @@ class OrgDocReader: elif m := BEGIN_BLOCK_RE.match(line): self.add_begin_block_line(linenum, m) in_block = True + list_item_indentation = None + list_item = None elif m := END_BLOCK_RE.match(line): self.add_end_block_line(linenum, m) in_block = False + list_item_indentation = None + list_item = None # Generic properties elif m := KEYWORDS_RE.match(line): self.add_keyword_line(linenum, m) elif m := DRAWER_END_RE.match(line): self.add_drawer_end_line(linenum, line, m) in_drawer = False + list_item_indentation = None + list_item = None elif (not in_drawer) and (m := DRAWER_START_RE.match(line)): self.add_property_drawer_line(linenum, line, m) in_drawer = True + list_item_indentation = None + list_item = None elif (not in_drawer) and (m := RESULTS_DRAWER_RE.match(line)): self.add_results_drawer_line(linenum, line, m) in_drawer = True + list_item_indentation = None + list_item = None elif m := NODE_PROPERTIES_RE.match(line): self.add_node_properties_line(linenum, m) elif line.strip().startswith('|'): self.add_table_line(linenum, line) + list_item_indentation = None + list_item = None # Not captured else: add_raw_line_with_possible_indentation(linenum, line) diff --git a/tests/06-lists.org b/tests/06-lists.org index af4b056..0c5448e 100644 --- a/tests/06-lists.org +++ b/tests/06-lists.org @@ -63,3 +63,13 @@ Also with markup - This is another list item... that has content on multiple lines + + Text after a multiline element + + - This is another + multiline list + + #+begin_quote + With a block element inside + #+end_quote + diff --git a/tests/test_org.py b/tests/test_org.py index 1f642cb..21b6518 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -551,7 +551,7 @@ class TestSerde(unittest.TestCase): MarkerToken(closing=False, tok_type=MarkerType.UNDERLINED_MODE), "markup", MarkerToken(closing=True, tok_type=MarkerType.UNDERLINED_MODE), - ".", + ".", "\n" ], ) @@ -567,7 +567,7 @@ class TestSerde(unittest.TestCase): self.assertEqual(lists2[0][0].counter, "1") self.assertEqual(lists2[0][0].counter_sep, ".") - self.assertEqual(lists2[0][1].content, ["Second element"]) + self.assertEqual(lists2[0][1].content, ["Second element", "\n"]) self.assertEqual(lists2[0][1].counter, "2") self.assertEqual(lists2[0][1].counter_sep, ".") @@ -575,7 +575,7 @@ class TestSerde(unittest.TestCase): self.assertEqual(lists2[1][0].counter, "1") self.assertEqual(lists2[1][0].counter_sep, ")") - self.assertEqual(lists2[1][1].content, ["Second element"]) + self.assertEqual(lists2[1][1].content, ["Second element", "\n"]) self.assertEqual(lists2[1][1].counter, "2") self.assertEqual(lists2[1][1].counter_sep, ")") @@ -583,13 +583,16 @@ class TestSerde(unittest.TestCase): # ... lists4 = hl4.getLists() print(lists4) - self.assertEqual(len(lists4), 1) + self.assertEqual(len(lists4), 2) - self.assertEqual(lists4[0][0].content, ["This is a list item...", "\n that spans multiple lines"]) + self.assertEqual(lists4[0][0].content, ["This is a list item...", "\n that spans multiple lines", "\n"]) self.assertEqual(lists4[0][0].bullet, "-") - self.assertEqual(lists4[0][1].content, ["This is another list item...", "\n that has content on multiple lines"]) + self.assertEqual(lists4[0][1].content, ["This is another list item...", "\n that has content on multiple lines", "\n"]) self.assertEqual(lists4[0][1].bullet, "-") + self.assertEqual(lists4[1][0].content, ["This is another", "\n multiline list", "\n"]) + self.assertEqual(lists4[1][0].bullet, "-") + def test_org_roam_07(self): with open(os.path.join(DIR, "07-org-roam-v2.org")) as f: orig = f.read() From 66d061dfdaf92c729bfff3eaa67e59b37367eabf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Tue, 15 Nov 2022 21:07:36 +0100 Subject: [PATCH 05/14] Expose `#+BEGIN_` block arguments. --- org_rw/dom.py | 3 ++- org_rw/org_rw.py | 19 ++++++++++++------- tests/test_org.py | 1 + 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/org_rw/dom.py b/org_rw/dom.py index 6b3e455..23ea44b 100644 --- a/org_rw/dom.py +++ b/org_rw/dom.py @@ -89,11 +89,12 @@ class BlockNode: class CodeBlock(BlockNode): - def __init__(self, header, subtype): + def __init__(self, header, subtype, arguments): super().__init__() self.header = header self.lines = None self.subtype = subtype + self.arguments = arguments def set_lines(self, lines): self.lines = lines diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 0021acd..4fbfdb3 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -92,10 +92,10 @@ LIST_ITEM_RE = re.compile( ) # Org-Babel -BEGIN_BLOCK_RE = re.compile(r"^\s*#\+BEGIN_(?P[^ ]+)(?P.*)$", re.I) +BEGIN_BLOCK_RE = re.compile(r"^\s*#\+BEGIN_(?P[^ ]+)(?P.*)$", re.I) END_BLOCK_RE = re.compile(r"^\s*#\+END_(?P[^ ]+)\s*$", re.I) RESULTS_DRAWER_RE = re.compile(r"^\s*:results:\s*$", re.I) -CodeSnippet = collections.namedtuple("CodeSnippet", ("name", "content", "result")) +CodeSnippet = collections.namedtuple("CodeSnippet", ("name", "content", "result", "arguments")) # Groupings NON_FINISHED_GROUPS = (type(None), dom.ListGroupNode, dom.ResultsDrawerNode, dom.PropertyDrawerNode) @@ -481,7 +481,7 @@ class Headline: and line.delimiter_type == DelimiterLineType.BEGIN_BLOCK ): assert type(current_node) in NON_FINISHED_GROUPS - current_node = dom.CodeBlock(line, line.type_data.subtype) + current_node = dom.CodeBlock(line, line.type_data.subtype, line.arguments) elif isinstance(line, Keyword): logging.warning("Keywords not implemented on `as_dom()`") @@ -764,11 +764,13 @@ class Headline: inside_code = False sections = [] + arguments = None for delimiter in self.delimiters: if delimiter.delimiter_type == DelimiterLineType.BEGIN_BLOCK and delimiter.type_data.subtype.lower() == "src": line_start = delimiter.linenum inside_code = True + arguments = delimiter.arguments elif delimiter.delimiter_type == DelimiterLineType.END_BLOCK and delimiter.type_data.subtype.lower() == "src": inside_code = False start, end = line_start, delimiter.linenum @@ -785,8 +787,10 @@ class Headline: "line_first": start + 1, "line_last": end - 1, "content": contents, + "arguments": arguments, } ) + arguments = None line_start = None for kword in self.keywords: @@ -837,7 +841,8 @@ class Headline: name = None content = section["content"] code_result = section.get("result", None) - results.append(CodeSnippet(name=name, content=content, result=code_result)) + arguments = section.get("arguments", None) + results.append(CodeSnippet(name=name, content=content, result=code_result, arguments=arguments)) return results @@ -991,7 +996,7 @@ BlockDelimiterTypeData = collections.namedtuple( ) DelimiterLine = collections.namedtuple( - "DelimiterLine", ("linenum", "line", "delimiter_type", "type_data") + "DelimiterLine", ("linenum", "line", "delimiter_type", "type_data", "arguments") ) @@ -2081,7 +2086,7 @@ class OrgDocReader: def add_begin_block_line(self, linenum: int, match: re.Match): line = DelimiterLine(linenum, match.group(0), DelimiterLineType.BEGIN_BLOCK, - BlockDelimiterTypeData(match.group("subtype"))) + BlockDelimiterTypeData(match.group("subtype")), match.group('arguments')) if len(self.headline_hierarchy) == 0: self.delimiters.append(line) else: @@ -2089,7 +2094,7 @@ class OrgDocReader: def add_end_block_line(self, linenum: int, match: re.Match): line = DelimiterLine(linenum, match.group(0), DelimiterLineType.END_BLOCK, - BlockDelimiterTypeData(match.group("subtype"))) + BlockDelimiterTypeData(match.group("subtype")), None) if len(self.headline_hierarchy) == 0: self.delimiters.append(line) else: diff --git a/tests/test_org.py b/tests/test_org.py index 21b6518..da980c5 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -440,6 +440,7 @@ class TestSerde(unittest.TestCase): + 'echo "with two lines"\n' + "exit 0 # Exit successfully", ) + self.assertEqual(snippets[0].arguments.split(), ['shell', ':results', 'verbatim']) self.assertEqual( snippets[0].result, "This is a test\n" + "with two lines", From 8832cd0b3d4e7646ed9d9c277c2c11b976f3b4b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Thu, 17 Nov 2022 00:20:20 +0100 Subject: [PATCH 06/14] Fix extraction of block element contents. Don't confound normal characters with formatting markers. Handle escaping of otherwise headline starters with a comma. --- org_rw/org_rw.py | 32 +++++++++++++++++++++++--------- tests/04-code.org | 13 +++++++++++++ tests/test_org.py | 10 +++++++++- 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 4fbfdb3..e3534a8 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -154,6 +154,26 @@ class RangeInRaw: contents.insert(start_idx + i + 1, element) +def unescape_block_lines(lines: str) -> str: + """ + Remove leading ',' from block_lines if they escape `*` characters. + """ + i = 0 + lines = lines.split('\n') + while i < len(lines): + line = lines[i] + if (line.lstrip(' ').startswith(',') + and line.lstrip(' ,').startswith('*') + ): + # Remove leading ',' + lead_pos = line.index(',') + line = line[:lead_pos] + line[lead_pos + 1:] + lines[i] = line + + i += 1 + + return '\n'.join(lines) + def get_links_from_content(content): in_link = False in_description = False @@ -356,7 +376,7 @@ class Headline: end = line.linenum lines = self.get_lines_between(start + 1, end) - contents = "\n".join(lines) + contents = unescape_block_lines("\n".join(lines)) if contents.endswith("\n"): # This is not ideal, but to avoid having to do this maybe # the content parsing must be re-thinked @@ -708,13 +728,7 @@ class Headline: def get_lines_between(self, start, end): for line in self.contents: if start <= line.linenum < end: - text = [] - for item in line.contents: - if isinstance(item, str): - text.append(item) - elif isinstance(item, MarkerType): - text.append(ModeToMarker[item]) - yield "".join(text) + yield "".join(line.get_raw()) def get_contents(self, format): if format == "raw": @@ -776,7 +790,7 @@ class Headline: start, end = line_start, delimiter.linenum lines = self.get_lines_between(start + 1, end) - contents = "\n".join(lines) + contents = unescape_block_lines("\n".join(lines)) if contents.endswith("\n"): # This is not ideal, but to avoid having to do this maybe # the content parsing must be re-thinked diff --git a/tests/04-code.org b/tests/04-code.org index 161dc2f..956d961 100644 --- a/tests/04-code.org +++ b/tests/04-code.org @@ -36,3 +36,16 @@ exit 0 # Comment This is another test with two lines too :end: + +* Escaped code + :PROPERTIES: + :ID: 04-code-escaped-code-id + :CREATED: [2020-01-01 Wed 01:01] + :END: + + #+BEGIN_SRC c :results drawer +/* This code has to be escaped to + ,* avoid confusion with new headlines. + ,*/ +main(){} + #+END_SRC diff --git a/tests/test_org.py b/tests/test_org.py index da980c5..8afd6a0 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -433,7 +433,7 @@ class TestSerde(unittest.TestCase): doc = load(f) snippets = list(doc.get_code_snippets()) - self.assertEqual(len(snippets), 2) + self.assertEqual(len(snippets), 3) self.assertEqual( snippets[0].content, 'echo "This is a test"\n' @@ -456,6 +456,14 @@ class TestSerde(unittest.TestCase): snippets[1].result, "This is another test\n" + "with two lines too" ) + self.assertEqual( + snippets[2].content, + '/* This code has to be escaped to\n' + + ' * avoid confusion with new headlines.\n' + + ' */\n' + + 'main(){}', + ) + def test_mimic_write_file_05(self): with open(os.path.join(DIR, "05-dates.org")) as f: orig = f.read() From d67bae645b111c6b5f60a111b10240490bd82568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Wed, 4 Jan 2023 00:36:20 +0100 Subject: [PATCH 07/14] Fix string representation of PropertyNode. --- org_rw/dom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/org_rw/dom.py b/org_rw/dom.py index 23ea44b..cb3d8fd 100644 --- a/org_rw/dom.py +++ b/org_rw/dom.py @@ -27,7 +27,7 @@ class PropertyNode: self.value = value def __repr__(self): - return "{{{}: {}}".format(self.key, self.value) + return "{{{}: {}}}".format(self.key, self.value) class ListGroupNode: From 70081245091cb7316ebfe973dfd561de739a3d67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Thu, 13 Apr 2023 23:56:58 +0200 Subject: [PATCH 08/14] Use more robust list de-indentation handler. --- org_rw/org_rw.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index e3534a8..822367b 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -457,17 +457,20 @@ class Headline: current_node = sublist indentation_tree.append(current_node) - while len(indentation_tree) > 0 and ( - (len(indentation_tree[-1].children) > 0) - and len( - [ - c - for c in indentation_tree[-1].children - if isinstance(c, dom.ListItem) - ][-1].orig.indentation - ) - > len(line.indentation) - ): + while len(indentation_tree) > 0: + list_children = [ + c + for c in indentation_tree[-1].children + if isinstance(c, dom.ListItem) + ] + + if ((len(list_children) > 0) + and (len(list_children[-1].orig.indentation) + <= len(line.indentation))): + # No more breaking out of lists, it's indentation + # is less than ours + break + rem = indentation_tree.pop(-1) if len(indentation_tree) == 0: indentation_tree.append(rem) From 302689a622eb55ad6dc04b408cee2eb544d99f54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sun, 23 Apr 2023 20:48:57 +0200 Subject: [PATCH 09/14] Fix dom generated of table nested into list. --- org_rw/org_rw.py | 12 ++++++--- tests/10-tables.org | 15 +++++++++++ tests/test_org.py | 66 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 89 insertions(+), 4 deletions(-) diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 822367b..734851f 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -488,9 +488,15 @@ class Headline: tree.append(current_node) # TODO: Allow indentation of this element inside others indentation_tree = [current_node] - if not isinstance(current_node, dom.TableNode): - if not isinstance(current_node, dom.TableNode): - logging.warning("Expected a {}, found: {} on line {}".format(dom.TableNode, current_node, line.linenum)) + elif not isinstance(current_node, dom.TableNode): + if isinstance(current_node, dom.ListGroupNode): + # As an item inside a list + list_node = current_node + current_node = dom.TableNode() + list_node.append(current_node) + indentation_tree.append(current_node) + else: + logging.debug("Expected a {}, found: {} on line {}".format(dom.TableNode, current_node, line.linenum)) # This can happen. Frequently inside a LogDrawer if len(line.cells) > 0 and len(line.cells[0]) > 0 and line.cells[0][0] == '-': diff --git a/tests/10-tables.org b/tests/10-tables.org index d9d404b..a473bed 100644 --- a/tests/10-tables.org +++ b/tests/10-tables.org @@ -16,3 +16,18 @@ | Content2-1 | Content2-2 | Content2-3 | Content after the table. +** Indented table +:PROPERTIES: +:ID: 10-table-test-id-02-indented +:CREATED: [2020-01-01 Wed 01:01] +:END: + +- This table is indented inside a list item. + - Item before in list + + | Header1 | Header2 | Header3 | + |------------+------------+------------| + | Content1-1 | Content1-2 | Content1-3 (last cell unclosed) + | Content2-1 | Content2-2 | Content2-3 | + - Item after in list +- This item happens after the indented table. diff --git a/tests/test_org.py b/tests/test_org.py index 8afd6a0..1255067 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -4,7 +4,8 @@ import unittest from datetime import date from datetime import datetime as DT -from org_rw import MarkerToken, MarkerType, Timestamp, dumps, load, loads +from org_rw import MarkerToken, MarkerType, Timestamp, dumps, load, loads, dom +import org_rw from utils.assertions import (BOLD, CODE, HL, ITALIC, SPAN, STRIKE, UNDERLINED, VERBATIM, WEB_LINK, Doc, Tokens) @@ -681,3 +682,66 @@ class TestSerde(unittest.TestCase): self.assertEqual(first_table[0].cells[1].strip(), 'Header2') self.assertEqual(first_table[0].cells[2].strip(), 'Header3') + hl = hl.children[0] + + tables = hl.get_tables() + first_table = tables[0] + self.assertEqual(len(first_table), 4) + + print(first_table[0]) + self.assertEqual(len(first_table[0].cells), 3) + self.assertEqual(first_table[0].cells[0].strip(), 'Header1') + self.assertEqual(first_table[0].cells[1].strip(), 'Header2') + self.assertEqual(first_table[0].cells[2].strip(), 'Header3') + + def test_tables_html_file_10(self): + with open(os.path.join(DIR, "10-tables.org")) as f: + doc = load(f) + + hl = doc.getTopHeadlines()[0] + + tree = hl.as_dom() + non_props = [ + item + for item in tree + if not isinstance(item, dom.PropertyDrawerNode) + ] + self.assertTrue(isinstance(non_props[0], dom.Text) + and isinstance(non_props[1], dom.TableNode) + and isinstance(non_props[2], dom.Text), + 'Expected ') + + + hl = hl.children[0] + tree = hl.as_dom() + non_props = [ + item + for item in tree + if not (isinstance(item, dom.PropertyDrawerNode) + or isinstance(item, dom.Text)) + ] + print_tree(non_props) + self.assertTrue(len(non_props) == 1, + 'Expected , with only (1) element') + + +def print_tree(tree, indentation=0, headline=None): + for element in tree: + print(" " * indentation * 2, "EL:", element) + if "children" in dir(element): + if len(element.children) > 0: + print_element(element.children, indentation + 1, headline) + print() + + elif "content" in dir(element): + for content in element.content: + print_element(content, indentation + 1, headline) + + +def print_element(element, indentation, headline): + if isinstance(element, org_rw.Link): + print(" " * indentation * 2, "Link:", element.get_raw()) + elif isinstance(element, str): + print(" " * indentation * 2, "Str[" + element.replace('\n', '') + "]", type(element)) + else: + print_tree(element, indentation, headline) From 790ef57598858bd8a504116e4e2a30bf9edf4ec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sun, 23 Apr 2023 20:49:28 +0200 Subject: [PATCH 10/14] Add extra check for TimeRange validity. --- org_rw/org_rw.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 734851f..90ff004 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -1067,6 +1067,8 @@ def token_from_type(tok_type): class TimeRange: def __init__(self, start_time: OrgTime, end_time: OrgTime): + assert start_time is not None + assert end_time is not None self.start_time = start_time self.end_time = end_time From 36a0a00113fb20fcc16e7220ca8e81ce4bf104fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Mon, 1 May 2023 20:40:02 +0200 Subject: [PATCH 11/14] Fix nested lists DOM generation and add tests. --- org_rw/org_rw.py | 5 +++-- tests/11-nested-lists.org | 16 ++++++++++++++++ tests/test_org.py | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 tests/11-nested-lists.org diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 90ff004..c40f0e4 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -464,8 +464,9 @@ class Headline: if isinstance(c, dom.ListItem) ] - if ((len(list_children) > 0) - and (len(list_children[-1].orig.indentation) + if (len(list_children) == 0): + break + if ((len(list_children[-1].orig.indentation) <= len(line.indentation))): # No more breaking out of lists, it's indentation # is less than ours diff --git a/tests/11-nested-lists.org b/tests/11-nested-lists.org new file mode 100644 index 0000000..a4a8632 --- /dev/null +++ b/tests/11-nested-lists.org @@ -0,0 +1,16 @@ +#+TITLE: 11-Nested lists +#+DESCRIPTION: Simple org file to test nested lists +#+TODO: TODO(t) PAUSED(p) | DONE(d) + +* Nested lists + :PROPERTIES: + :ID: 11-nested-lists + :CREATED: [2020-01-01 Wed 01:01] + :END: + - 1 + - 1.1 + - 1.2 + - 2 + - 2.1 + - 2.2 + - 3 diff --git a/tests/test_org.py b/tests/test_org.py index 1255067..2f4200d 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -724,6 +724,39 @@ class TestSerde(unittest.TestCase): self.assertTrue(len(non_props) == 1, 'Expected , with only (1) element') + def test_nested_lists_html_file_11(self): + with open(os.path.join(DIR, "11-nested-lists.org")) as f: + doc = load(f) + + hl = doc.getTopHeadlines()[0] + + tree = hl.as_dom() + non_props = [ + item + for item in tree + if not isinstance(item, dom.PropertyDrawerNode) + ] + print_tree(non_props) + self.assertTrue((len(non_props) == 1) and (isinstance(non_props[0], dom.ListGroupNode)), + 'Expected only as top level') + + dom_list = non_props[0] + children = dom_list.children + self.assertTrue(len(children) == 5, 'Expected 5 items inside , 3 texts and 2 sublists') + + # Assert texts + self.assertEqual(children[0].content, ['1']) + self.assertEqual(children[2].content, ['2']) + self.assertEqual(children[4].content[0], '3') # Might be ['3', '\n'] but shouldn't be a breaking change + + # Assert lists + self.assertTrue(isinstance(children[1], dom.ListGroupNode), 'Expected sublist inside "1"') + self.assertEqual(children[1].children[0].content, ['1.1']) + self.assertEqual(children[1].children[1].content, ['1.2']) + self.assertTrue(isinstance(children[3], dom.ListGroupNode), 'Expected sublist inside "2"') + self.assertEqual(children[3].children[0].content, ['2.1']) + self.assertEqual(children[3].children[1].content, ['2.2']) + def print_tree(tree, indentation=0, headline=None): for element in tree: From cebe979066869fb0cd18bb6195244b759635dcf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Mon, 1 May 2023 20:40:15 +0200 Subject: [PATCH 12/14] Fix typo on lists test file. --- tests/06-lists.org | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/06-lists.org b/tests/06-lists.org index 0c5448e..1ee04e7 100644 --- a/tests/06-lists.org +++ b/tests/06-lists.org @@ -1,5 +1,5 @@ -#+TITLE: 06-Links -#+DESCRIPTION: Simple org file to test links +#+TITLE: 06-Lists +#+DESCRIPTION: Simple org file to test lists #+TODO: TODO(t) PAUSED(p) | DONE(d) From f11ecd05d6c55e55438ad3c701dec6cda8724ae6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sun, 30 Jul 2023 23:14:29 +0200 Subject: [PATCH 13/14] Fix: find links in list tags. --- org_rw/org_rw.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index c40f0e4..3d2c14d 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -733,6 +733,8 @@ class Headline: for lst in self.get_lists(): for item in lst: + if item.tag: + yield from get_links_from_content(item.tag) yield from get_links_from_content(item.content) def get_lines_between(self, start, end): From 2749a5caadbb09cebef5c26f9b31fb9275ce716c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sat, 7 Oct 2023 13:14:13 +0200 Subject: [PATCH 14/14] Implement Headline.add_tag()/.create_headline_at_end(). --- org_rw/org_rw.py | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 3d2c14d..9a60199 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -680,6 +680,9 @@ class Headline: else: return list(self.shallow_tags) + self.parent.tags + def add_tag(self, tag: str): + self.shallow_tags.append(tag) + def get_property(self, name: str, default=None): for prop in self.properties: if prop.key == name: @@ -872,6 +875,35 @@ class Headline: return results + def create_headline_at_end(self) -> Headline: + headline = Headline( + start_line=1, + depth=self.depth + 1, + orig=None, + properties=[], + keywords=[], + priority_start=None, + priority=None, + title_start=None, + title="", + state="", + tags_start=None, + tags=[], + contents=[], + children=[], + structural=[], + delimiters=[], + list_items=[], + table_rows=[], + parent=self, + is_todo=False, + is_done=False, + spacing=" ", + ) + + self.children.append(headline) + return headline + RawLine = collections.namedtuple("RawLine", ("linenum", "line")) Keyword = collections.namedtuple( @@ -1884,7 +1916,12 @@ class OrgDoc: if headline.state: state = headline.state + " " - yield "*" * headline.depth + headline.spacing + state + token_list_to_raw(headline.title.contents) + tags + raw_title = token_list_to_raw(headline.title.contents) + tags_padding = "" + if not raw_title.endswith(" ") and tags: + tags_padding = " " + + yield "*" * headline.depth + headline.spacing + state + raw_title + tags_padding + tags planning = headline.get_planning_line() if planning is not None: