From 6c6c375572a99aeb18fd46bba97261773d734d86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Sat, 12 Nov 2022 12:37:50 +0100
Subject: [PATCH 1/2] WIP: Add support for multiline list items.

Right now this messes with line numbers.
---
 org_rw/org_rw.py   | 131 +++++++++++++++++++++++++++++++--------------
 tests/06-lists.org |  12 +++++
 tests/test_org.py  |  11 ++++
 3 files changed, 114 insertions(+), 40 deletions(-)

diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py
index 29f2998..afba2f3 100644
--- a/org_rw/org_rw.py
+++ b/org_rw/org_rw.py
@@ -549,14 +549,26 @@ class Headline:
         last_line = None
 
         for li in self.list_items:
-            if last_line == li.linenum - 1:
-                lists[-1].append(li)
-            else:
+            if last_line is None:
                 lists.append([li])
+            else:
+                num_lines = li.linenum - (last_line + 1)
+                lines_between = ''.join(['\n' + l
+                                         for l in self.get_lines_between(last_line + 1, li.linenum)]
+                                        )
 
-            last_line = li.linenum
+                # Only empty lines
+                if ((num_lines == lines_between.count('\n'))
+                    and (len(lines_between.strip()) == 0)
+                ):
+                    lists[-1].append(li)
+                else:
+                    lists.append([li])
+
+            last_line = li.linenum + sum(c.count('\n') for c in li.content)
         return lists
 
+    # @DEPRECATED: use `get_lists`
     def getLists(self):
         return self.get_lists()
 
@@ -838,22 +850,30 @@ Property = collections.namedtuple(
     "Property", ("linenum", "match", "key", "value", "options")
 )
 
-ListItem = collections.namedtuple(
-    "ListItem",
-    (
-        "linenum",
-        "match",
-        "indentation",
-        "bullet",
-        "counter",
-        "counter_sep",
-        "checkbox_indentation",
-        "checkbox_value",
-        "tag_indentation",
-        "tag",
-        "content",
-    ),
-)
+class ListItem:
+    def __init__(self,
+        linenum, match,
+        indentation,
+        bullet, counter, counter_sep,
+        checkbox_indentation, checkbox_value,
+        tag_indentation, tag,
+        content,
+    ):
+        self.linenum = linenum
+        self.match = match
+        self.indentation = indentation
+        self.bullet = bullet
+        self.counter = counter
+        self.counter_sep = counter_sep
+        self.checkbox_indentation = checkbox_indentation
+        self.checkbox_value = checkbox_value
+        self.tag_indentation = tag_indentation
+        self.tag = tag
+        self.content = content
+
+    def append_line(self, line):
+        self.content += parse_content_block('\n' + line[len(self.indentation):]).contents
+
 TableRow = collections.namedtuple(
     "TableRow",
     (
@@ -1555,14 +1575,20 @@ def parse_contents(raw_contents: List[RawLine]):
     return [parse_content_block(block) for block in blocks]
 
 
-def parse_content_block(raw_contents: List[RawLine]):
+def parse_content_block(raw_contents: Union[List[RawLine],str]):
     contents_buff = []
-    for line in raw_contents:
-        contents_buff.append(line.line)
+    if isinstance(raw_contents, str):
+        contents_buff.append(raw_contents)
+    else:
+        for line in raw_contents:
+            contents_buff.append(line.line)
 
     contents = "\n".join(contents_buff)
     tokens = tokenize_contents(contents)
-    current_line = raw_contents[0].linenum
+    if isinstance(raw_contents, str):
+        current_line = None
+    else:
+        current_line = raw_contents[0].linenum
 
     contents = []
     # Use tokens to tag chunks of text with it's container type
@@ -1589,7 +1615,12 @@ def dump_contents(raw):
 
     elif isinstance(raw, ListItem):
         bullet = raw.bullet if raw.bullet else raw.counter + raw.counter_sep
-        content = token_list_to_raw(raw.content)
+        content_full = token_list_to_raw(raw.content)
+        content_lines = content_full.split('\n')
+        content = '\n'.join([content_lines[0], *[
+            raw.indentation + line
+            for line in content_lines[1:]
+        ]])
         checkbox = f"[{raw.checkbox_value}]" if raw.checkbox_value else ""
         tag = f"{raw.tag_indentation}{token_list_to_raw(raw.tag or '')}::" if raw.tag or raw.tag_indentation else ""
         return (
@@ -1978,19 +2009,19 @@ class OrgDocReader:
 
     def add_list_item_line(self, linenum: int, match: re.Match) -> int:
         li = ListItem(
-            linenum,
-            match,
-            match.group("indentation"),
-            match.group("bullet"),
-            match.group("counter"),
-            match.group("counter_sep"),
-            match.group("checkbox_indentation"),
-            match.group("checkbox_value"),
-            match.group("tag_indentation"),
-            parse_content_block(
+            linenum=linenum,
+            match=match,
+            indentation=match.group("indentation"),
+            bullet=match.group("bullet"),
+            counter=match.group("counter"),
+            counter_sep=match.group("counter_sep"),
+            checkbox_indentation=match.group("checkbox_indentation"),
+            checkbox_value=match.group("checkbox_value"),
+            tag_indentation=match.group("tag_indentation"),
+            tag=parse_content_block(
                 [RawLine(linenum=linenum, line=match.group("tag"))]
             ).contents if match.group("tag") else None,
-            parse_content_block(
+            content=parse_content_block(
                 [RawLine(linenum=linenum, line=match.group("content"))]
             ).contents,
         )
@@ -1999,6 +2030,7 @@ class OrgDocReader:
             self.list_items.append(li)
         else:
             self.headline_hierarchy[-1]["list_items"].append(li)
+        return li
 
     def add_table_line(self, linenum: int, line: str) -> int:
         chunks = line.split('|')
@@ -2108,6 +2140,22 @@ class OrgDocReader:
         reader = enumerate(lines)
         in_drawer = False
         in_block = False
+        list_item_indentation = None
+        list_item = None
+
+        def add_raw_line_with_possible_indentation(linenum, line):
+            added = False
+            nonlocal list_item
+            nonlocal list_item_indentation
+            if list_item:
+                if line.startswith(list_item_indentation):
+                    list_item.append_line(line)
+                    added = True
+                elif len(line.strip()) > 0:
+                    list_item = None
+                    list_item_indentation = None
+            if not added:
+                self.add_raw_line(linenum, line)
 
         for lnum, line in reader:
             linenum = lnum + 1
@@ -2117,14 +2165,17 @@ class OrgDocReader:
                         self.add_end_block_line(linenum, m)
                         in_block = False
                     else:
-                        self.add_raw_line(linenum, line)
+                        add_raw_line_with_possible_indentation(linenum, line)
 
                 elif m := HEADLINE_RE.match(line):
+                    list_item_indentation = None
+                    list_item = None
                     self.add_headline(linenum, m)
                 elif m := LIST_ITEM_RE.match(line):
-                    self.add_list_item_line(linenum, m)
+                    list_item = self.add_list_item_line(linenum, m)
+                    list_item_indentation = m.group("indentation")
                 elif m := RAW_LINE_RE.match(line):
-                    self.add_raw_line(linenum, line)
+                    add_raw_line_with_possible_indentation(linenum, line)
                 # Org-babel
                 elif m := BEGIN_BLOCK_RE.match(line):
                     self.add_begin_block_line(linenum, m)
@@ -2150,7 +2201,7 @@ class OrgDocReader:
                     self.add_table_line(linenum, line)
                 # Not captured
                 else:
-                    self.add_raw_line(linenum, line)
+                    add_raw_line_with_possible_indentation(linenum, line)
             except:
                 logging.error("Error line {}: {}".format(linenum + 1, line))
                 raise
diff --git a/tests/06-lists.org b/tests/06-lists.org
index b80e358..af4b056 100644
--- a/tests/06-lists.org
+++ b/tests/06-lists.org
@@ -51,3 +51,15 @@ Also with markup
 
 - _Key_ :: _Value_
 - /Key/ 2 :: /Value/ 2
+
+* List with multiline elements
+  :PROPERTIES:
+  :ID:       07-list-with-multiline-elements
+  :CREATED:  [2020-01-01 Wed 01:01]
+  :END:
+
+  - This is a list item...
+    that spans multiple lines
+
+  - This is another list item...
+    that has content on multiple lines
diff --git a/tests/test_org.py b/tests/test_org.py
index 3509ffc..1f642cb 100644
--- a/tests/test_org.py
+++ b/tests/test_org.py
@@ -579,6 +579,17 @@ class TestSerde(unittest.TestCase):
         self.assertEqual(lists2[1][1].counter, "2")
         self.assertEqual(lists2[1][1].counter_sep, ")")
 
+        hl4 = doc.getTopHeadlines()[3]
+        # ...
+        lists4 = hl4.getLists()
+        print(lists4)
+        self.assertEqual(len(lists4), 1)
+
+        self.assertEqual(lists4[0][0].content, ["This is a list item...", "\n  that spans multiple lines"])
+        self.assertEqual(lists4[0][0].bullet, "-")
+        self.assertEqual(lists4[0][1].content, ["This is another list item...", "\n  that has content on multiple lines"])
+        self.assertEqual(lists4[0][1].bullet, "-")
+
     def test_org_roam_07(self):
         with open(os.path.join(DIR, "07-org-roam-v2.org")) as f:
             orig = f.read()

From b81990445729b61e6b3f8c9947351e52dae12962 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Sat, 12 Nov 2022 18:01:00 +0100
Subject: [PATCH 2/2] Fix handling of block elements after multiline list
 items.

---
 org_rw/org_rw.py   | 32 +++++++++++++++++++++++++-------
 tests/06-lists.org | 10 ++++++++++
 tests/test_org.py  | 15 +++++++++------
 3 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py
index afba2f3..0021acd 100644
--- a/org_rw/org_rw.py
+++ b/org_rw/org_rw.py
@@ -871,8 +871,12 @@ class ListItem:
         self.tag = tag
         self.content = content
 
+    @property
+    def text_start_pos(self):
+        return len(self.indentation) + 1 # Indentation + bullet
+
     def append_line(self, line):
-        self.content += parse_content_block('\n' + line[len(self.indentation):]).contents
+        self.content += parse_content_block('\n' + line).contents
 
 TableRow = collections.namedtuple(
     "TableRow",
@@ -1617,10 +1621,7 @@ def dump_contents(raw):
         bullet = raw.bullet if raw.bullet else raw.counter + raw.counter_sep
         content_full = token_list_to_raw(raw.content)
         content_lines = content_full.split('\n')
-        content = '\n'.join([content_lines[0], *[
-            raw.indentation + line
-            for line in content_lines[1:]
-        ]])
+        content = '\n'.join(content_lines)
         checkbox = f"[{raw.checkbox_value}]" if raw.checkbox_value else ""
         tag = f"{raw.tag_indentation}{token_list_to_raw(raw.tag or '')}::" if raw.tag or raw.tag_indentation else ""
         return (
@@ -2148,12 +2149,15 @@ class OrgDocReader:
             nonlocal list_item
             nonlocal list_item_indentation
             if list_item:
-                if line.startswith(list_item_indentation):
+                if ((line[:list_item.text_start_pos].strip() == '')
+                    or (len(line.strip()) == 0)
+                ):
                     list_item.append_line(line)
                     added = True
-                elif len(line.strip()) > 0:
+                else:
                     list_item = None
                     list_item_indentation = None
+
             if not added:
                 self.add_raw_line(linenum, line)
 
@@ -2164,6 +2168,8 @@ class OrgDocReader:
                     if m := END_BLOCK_RE.match(line):
                         self.add_end_block_line(linenum, m)
                         in_block = False
+                        list_item_indentation = None
+                        list_item = None
                     else:
                         add_raw_line_with_possible_indentation(linenum, line)
 
@@ -2180,25 +2186,37 @@ class OrgDocReader:
                 elif m := BEGIN_BLOCK_RE.match(line):
                     self.add_begin_block_line(linenum, m)
                     in_block = True
+                    list_item_indentation = None
+                    list_item = None
                 elif m := END_BLOCK_RE.match(line):
                     self.add_end_block_line(linenum, m)
                     in_block = False
+                    list_item_indentation = None
+                    list_item = None
                 # Generic properties
                 elif m := KEYWORDS_RE.match(line):
                     self.add_keyword_line(linenum, m)
                 elif m := DRAWER_END_RE.match(line):
                     self.add_drawer_end_line(linenum, line, m)
                     in_drawer = False
+                    list_item_indentation = None
+                    list_item = None
                 elif (not in_drawer) and (m := DRAWER_START_RE.match(line)):
                     self.add_property_drawer_line(linenum, line, m)
                     in_drawer = True
+                    list_item_indentation = None
+                    list_item = None
                 elif (not in_drawer) and (m := RESULTS_DRAWER_RE.match(line)):
                     self.add_results_drawer_line(linenum, line, m)
                     in_drawer = True
+                    list_item_indentation = None
+                    list_item = None
                 elif m := NODE_PROPERTIES_RE.match(line):
                     self.add_node_properties_line(linenum, m)
                 elif line.strip().startswith('|'):
                     self.add_table_line(linenum, line)
+                    list_item_indentation = None
+                    list_item = None
                 # Not captured
                 else:
                     add_raw_line_with_possible_indentation(linenum, line)
diff --git a/tests/06-lists.org b/tests/06-lists.org
index af4b056..0c5448e 100644
--- a/tests/06-lists.org
+++ b/tests/06-lists.org
@@ -63,3 +63,13 @@ Also with markup
 
   - This is another list item...
     that has content on multiple lines
+
+  Text after a multiline element
+
+  - This is another
+    multiline list
+
+    #+begin_quote
+    With a block element inside
+    #+end_quote
+
diff --git a/tests/test_org.py b/tests/test_org.py
index 1f642cb..21b6518 100644
--- a/tests/test_org.py
+++ b/tests/test_org.py
@@ -551,7 +551,7 @@ class TestSerde(unittest.TestCase):
                 MarkerToken(closing=False, tok_type=MarkerType.UNDERLINED_MODE),
                 "markup",
                 MarkerToken(closing=True, tok_type=MarkerType.UNDERLINED_MODE),
-                ".",
+                ".", "\n"
             ],
         )
 
@@ -567,7 +567,7 @@ class TestSerde(unittest.TestCase):
         self.assertEqual(lists2[0][0].counter, "1")
         self.assertEqual(lists2[0][0].counter_sep, ".")
 
-        self.assertEqual(lists2[0][1].content, ["Second element"])
+        self.assertEqual(lists2[0][1].content, ["Second element", "\n"])
         self.assertEqual(lists2[0][1].counter, "2")
         self.assertEqual(lists2[0][1].counter_sep, ".")
 
@@ -575,7 +575,7 @@ class TestSerde(unittest.TestCase):
         self.assertEqual(lists2[1][0].counter, "1")
         self.assertEqual(lists2[1][0].counter_sep, ")")
 
-        self.assertEqual(lists2[1][1].content, ["Second element"])
+        self.assertEqual(lists2[1][1].content, ["Second element", "\n"])
         self.assertEqual(lists2[1][1].counter, "2")
         self.assertEqual(lists2[1][1].counter_sep, ")")
 
@@ -583,13 +583,16 @@ class TestSerde(unittest.TestCase):
         # ...
         lists4 = hl4.getLists()
         print(lists4)
-        self.assertEqual(len(lists4), 1)
+        self.assertEqual(len(lists4), 2)
 
-        self.assertEqual(lists4[0][0].content, ["This is a list item...", "\n  that spans multiple lines"])
+        self.assertEqual(lists4[0][0].content, ["This is a list item...", "\n    that spans multiple lines", "\n"])
         self.assertEqual(lists4[0][0].bullet, "-")
-        self.assertEqual(lists4[0][1].content, ["This is another list item...", "\n  that has content on multiple lines"])
+        self.assertEqual(lists4[0][1].content, ["This is another list item...", "\n    that has content on multiple lines", "\n"])
         self.assertEqual(lists4[0][1].bullet, "-")
 
+        self.assertEqual(lists4[1][0].content, ["This is another", "\n    multiline list", "\n"])
+        self.assertEqual(lists4[1][0].bullet, "-")
+
     def test_org_roam_07(self):
         with open(os.path.join(DIR, "07-org-roam-v2.org")) as f:
             orig = f.read()