From 6c6c375572a99aeb18fd46bba97261773d734d86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sat, 12 Nov 2022 12:37:50 +0100 Subject: [PATCH 1/2] WIP: Add support for multiline list items. Right now this messes with line numbers. --- org_rw/org_rw.py | 131 +++++++++++++++++++++++++++++++-------------- tests/06-lists.org | 12 +++++ tests/test_org.py | 11 ++++ 3 files changed, 114 insertions(+), 40 deletions(-) diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 29f2998..afba2f3 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -549,14 +549,26 @@ class Headline: last_line = None for li in self.list_items: - if last_line == li.linenum - 1: - lists[-1].append(li) - else: + if last_line is None: lists.append([li]) + else: + num_lines = li.linenum - (last_line + 1) + lines_between = ''.join(['\n' + l + for l in self.get_lines_between(last_line + 1, li.linenum)] + ) - last_line = li.linenum + # Only empty lines + if ((num_lines == lines_between.count('\n')) + and (len(lines_between.strip()) == 0) + ): + lists[-1].append(li) + else: + lists.append([li]) + + last_line = li.linenum + sum(c.count('\n') for c in li.content) return lists + # @DEPRECATED: use `get_lists` def getLists(self): return self.get_lists() @@ -838,22 +850,30 @@ Property = collections.namedtuple( "Property", ("linenum", "match", "key", "value", "options") ) -ListItem = collections.namedtuple( - "ListItem", - ( - "linenum", - "match", - "indentation", - "bullet", - "counter", - "counter_sep", - "checkbox_indentation", - "checkbox_value", - "tag_indentation", - "tag", - "content", - ), -) +class ListItem: + def __init__(self, + linenum, match, + indentation, + bullet, counter, counter_sep, + checkbox_indentation, checkbox_value, + tag_indentation, tag, + content, + ): + self.linenum = linenum + self.match = match + self.indentation = indentation + self.bullet = bullet + self.counter = counter + self.counter_sep = counter_sep + self.checkbox_indentation = checkbox_indentation + self.checkbox_value = checkbox_value + self.tag_indentation = tag_indentation + self.tag = tag + self.content = content + + def append_line(self, line): + self.content += parse_content_block('\n' + line[len(self.indentation):]).contents + TableRow = collections.namedtuple( "TableRow", ( @@ -1555,14 +1575,20 @@ def parse_contents(raw_contents: List[RawLine]): return [parse_content_block(block) for block in blocks] -def parse_content_block(raw_contents: List[RawLine]): +def parse_content_block(raw_contents: Union[List[RawLine],str]): contents_buff = [] - for line in raw_contents: - contents_buff.append(line.line) + if isinstance(raw_contents, str): + contents_buff.append(raw_contents) + else: + for line in raw_contents: + contents_buff.append(line.line) contents = "\n".join(contents_buff) tokens = tokenize_contents(contents) - current_line = raw_contents[0].linenum + if isinstance(raw_contents, str): + current_line = None + else: + current_line = raw_contents[0].linenum contents = [] # Use tokens to tag chunks of text with it's container type @@ -1589,7 +1615,12 @@ def dump_contents(raw): elif isinstance(raw, ListItem): bullet = raw.bullet if raw.bullet else raw.counter + raw.counter_sep - content = token_list_to_raw(raw.content) + content_full = token_list_to_raw(raw.content) + content_lines = content_full.split('\n') + content = '\n'.join([content_lines[0], *[ + raw.indentation + line + for line in content_lines[1:] + ]]) checkbox = f"[{raw.checkbox_value}]" if raw.checkbox_value else "" tag = f"{raw.tag_indentation}{token_list_to_raw(raw.tag or '')}::" if raw.tag or raw.tag_indentation else "" return ( @@ -1978,19 +2009,19 @@ class OrgDocReader: def add_list_item_line(self, linenum: int, match: re.Match) -> int: li = ListItem( - linenum, - match, - match.group("indentation"), - match.group("bullet"), - match.group("counter"), - match.group("counter_sep"), - match.group("checkbox_indentation"), - match.group("checkbox_value"), - match.group("tag_indentation"), - parse_content_block( + linenum=linenum, + match=match, + indentation=match.group("indentation"), + bullet=match.group("bullet"), + counter=match.group("counter"), + counter_sep=match.group("counter_sep"), + checkbox_indentation=match.group("checkbox_indentation"), + checkbox_value=match.group("checkbox_value"), + tag_indentation=match.group("tag_indentation"), + tag=parse_content_block( [RawLine(linenum=linenum, line=match.group("tag"))] ).contents if match.group("tag") else None, - parse_content_block( + content=parse_content_block( [RawLine(linenum=linenum, line=match.group("content"))] ).contents, ) @@ -1999,6 +2030,7 @@ class OrgDocReader: self.list_items.append(li) else: self.headline_hierarchy[-1]["list_items"].append(li) + return li def add_table_line(self, linenum: int, line: str) -> int: chunks = line.split('|') @@ -2108,6 +2140,22 @@ class OrgDocReader: reader = enumerate(lines) in_drawer = False in_block = False + list_item_indentation = None + list_item = None + + def add_raw_line_with_possible_indentation(linenum, line): + added = False + nonlocal list_item + nonlocal list_item_indentation + if list_item: + if line.startswith(list_item_indentation): + list_item.append_line(line) + added = True + elif len(line.strip()) > 0: + list_item = None + list_item_indentation = None + if not added: + self.add_raw_line(linenum, line) for lnum, line in reader: linenum = lnum + 1 @@ -2117,14 +2165,17 @@ class OrgDocReader: self.add_end_block_line(linenum, m) in_block = False else: - self.add_raw_line(linenum, line) + add_raw_line_with_possible_indentation(linenum, line) elif m := HEADLINE_RE.match(line): + list_item_indentation = None + list_item = None self.add_headline(linenum, m) elif m := LIST_ITEM_RE.match(line): - self.add_list_item_line(linenum, m) + list_item = self.add_list_item_line(linenum, m) + list_item_indentation = m.group("indentation") elif m := RAW_LINE_RE.match(line): - self.add_raw_line(linenum, line) + add_raw_line_with_possible_indentation(linenum, line) # Org-babel elif m := BEGIN_BLOCK_RE.match(line): self.add_begin_block_line(linenum, m) @@ -2150,7 +2201,7 @@ class OrgDocReader: self.add_table_line(linenum, line) # Not captured else: - self.add_raw_line(linenum, line) + add_raw_line_with_possible_indentation(linenum, line) except: logging.error("Error line {}: {}".format(linenum + 1, line)) raise diff --git a/tests/06-lists.org b/tests/06-lists.org index b80e358..af4b056 100644 --- a/tests/06-lists.org +++ b/tests/06-lists.org @@ -51,3 +51,15 @@ Also with markup - _Key_ :: _Value_ - /Key/ 2 :: /Value/ 2 + +* List with multiline elements + :PROPERTIES: + :ID: 07-list-with-multiline-elements + :CREATED: [2020-01-01 Wed 01:01] + :END: + + - This is a list item... + that spans multiple lines + + - This is another list item... + that has content on multiple lines diff --git a/tests/test_org.py b/tests/test_org.py index 3509ffc..1f642cb 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -579,6 +579,17 @@ class TestSerde(unittest.TestCase): self.assertEqual(lists2[1][1].counter, "2") self.assertEqual(lists2[1][1].counter_sep, ")") + hl4 = doc.getTopHeadlines()[3] + # ... + lists4 = hl4.getLists() + print(lists4) + self.assertEqual(len(lists4), 1) + + self.assertEqual(lists4[0][0].content, ["This is a list item...", "\n that spans multiple lines"]) + self.assertEqual(lists4[0][0].bullet, "-") + self.assertEqual(lists4[0][1].content, ["This is another list item...", "\n that has content on multiple lines"]) + self.assertEqual(lists4[0][1].bullet, "-") + def test_org_roam_07(self): with open(os.path.join(DIR, "07-org-roam-v2.org")) as f: orig = f.read() From b81990445729b61e6b3f8c9947351e52dae12962 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sat, 12 Nov 2022 18:01:00 +0100 Subject: [PATCH 2/2] Fix handling of block elements after multiline list items. --- org_rw/org_rw.py | 32 +++++++++++++++++++++++++------- tests/06-lists.org | 10 ++++++++++ tests/test_org.py | 15 +++++++++------ 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index afba2f3..0021acd 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -871,8 +871,12 @@ class ListItem: self.tag = tag self.content = content + @property + def text_start_pos(self): + return len(self.indentation) + 1 # Indentation + bullet + def append_line(self, line): - self.content += parse_content_block('\n' + line[len(self.indentation):]).contents + self.content += parse_content_block('\n' + line).contents TableRow = collections.namedtuple( "TableRow", @@ -1617,10 +1621,7 @@ def dump_contents(raw): bullet = raw.bullet if raw.bullet else raw.counter + raw.counter_sep content_full = token_list_to_raw(raw.content) content_lines = content_full.split('\n') - content = '\n'.join([content_lines[0], *[ - raw.indentation + line - for line in content_lines[1:] - ]]) + content = '\n'.join(content_lines) checkbox = f"[{raw.checkbox_value}]" if raw.checkbox_value else "" tag = f"{raw.tag_indentation}{token_list_to_raw(raw.tag or '')}::" if raw.tag or raw.tag_indentation else "" return ( @@ -2148,12 +2149,15 @@ class OrgDocReader: nonlocal list_item nonlocal list_item_indentation if list_item: - if line.startswith(list_item_indentation): + if ((line[:list_item.text_start_pos].strip() == '') + or (len(line.strip()) == 0) + ): list_item.append_line(line) added = True - elif len(line.strip()) > 0: + else: list_item = None list_item_indentation = None + if not added: self.add_raw_line(linenum, line) @@ -2164,6 +2168,8 @@ class OrgDocReader: if m := END_BLOCK_RE.match(line): self.add_end_block_line(linenum, m) in_block = False + list_item_indentation = None + list_item = None else: add_raw_line_with_possible_indentation(linenum, line) @@ -2180,25 +2186,37 @@ class OrgDocReader: elif m := BEGIN_BLOCK_RE.match(line): self.add_begin_block_line(linenum, m) in_block = True + list_item_indentation = None + list_item = None elif m := END_BLOCK_RE.match(line): self.add_end_block_line(linenum, m) in_block = False + list_item_indentation = None + list_item = None # Generic properties elif m := KEYWORDS_RE.match(line): self.add_keyword_line(linenum, m) elif m := DRAWER_END_RE.match(line): self.add_drawer_end_line(linenum, line, m) in_drawer = False + list_item_indentation = None + list_item = None elif (not in_drawer) and (m := DRAWER_START_RE.match(line)): self.add_property_drawer_line(linenum, line, m) in_drawer = True + list_item_indentation = None + list_item = None elif (not in_drawer) and (m := RESULTS_DRAWER_RE.match(line)): self.add_results_drawer_line(linenum, line, m) in_drawer = True + list_item_indentation = None + list_item = None elif m := NODE_PROPERTIES_RE.match(line): self.add_node_properties_line(linenum, m) elif line.strip().startswith('|'): self.add_table_line(linenum, line) + list_item_indentation = None + list_item = None # Not captured else: add_raw_line_with_possible_indentation(linenum, line) diff --git a/tests/06-lists.org b/tests/06-lists.org index af4b056..0c5448e 100644 --- a/tests/06-lists.org +++ b/tests/06-lists.org @@ -63,3 +63,13 @@ Also with markup - This is another list item... that has content on multiple lines + + Text after a multiline element + + - This is another + multiline list + + #+begin_quote + With a block element inside + #+end_quote + diff --git a/tests/test_org.py b/tests/test_org.py index 1f642cb..21b6518 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -551,7 +551,7 @@ class TestSerde(unittest.TestCase): MarkerToken(closing=False, tok_type=MarkerType.UNDERLINED_MODE), "markup", MarkerToken(closing=True, tok_type=MarkerType.UNDERLINED_MODE), - ".", + ".", "\n" ], ) @@ -567,7 +567,7 @@ class TestSerde(unittest.TestCase): self.assertEqual(lists2[0][0].counter, "1") self.assertEqual(lists2[0][0].counter_sep, ".") - self.assertEqual(lists2[0][1].content, ["Second element"]) + self.assertEqual(lists2[0][1].content, ["Second element", "\n"]) self.assertEqual(lists2[0][1].counter, "2") self.assertEqual(lists2[0][1].counter_sep, ".") @@ -575,7 +575,7 @@ class TestSerde(unittest.TestCase): self.assertEqual(lists2[1][0].counter, "1") self.assertEqual(lists2[1][0].counter_sep, ")") - self.assertEqual(lists2[1][1].content, ["Second element"]) + self.assertEqual(lists2[1][1].content, ["Second element", "\n"]) self.assertEqual(lists2[1][1].counter, "2") self.assertEqual(lists2[1][1].counter_sep, ")") @@ -583,13 +583,16 @@ class TestSerde(unittest.TestCase): # ... lists4 = hl4.getLists() print(lists4) - self.assertEqual(len(lists4), 1) + self.assertEqual(len(lists4), 2) - self.assertEqual(lists4[0][0].content, ["This is a list item...", "\n that spans multiple lines"]) + self.assertEqual(lists4[0][0].content, ["This is a list item...", "\n that spans multiple lines", "\n"]) self.assertEqual(lists4[0][0].bullet, "-") - self.assertEqual(lists4[0][1].content, ["This is another list item...", "\n that has content on multiple lines"]) + self.assertEqual(lists4[0][1].content, ["This is another list item...", "\n that has content on multiple lines", "\n"]) self.assertEqual(lists4[0][1].bullet, "-") + self.assertEqual(lists4[1][0].content, ["This is another", "\n multiline list", "\n"]) + self.assertEqual(lists4[1][0].bullet, "-") + def test_org_roam_07(self): with open(os.path.join(DIR, "07-org-roam-v2.org")) as f: orig = f.read()