diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 0021acd..29f2998 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -549,26 +549,14 @@ class Headline: last_line = None for li in self.list_items: - if last_line is None: - lists.append([li]) + if last_line == li.linenum - 1: + lists[-1].append(li) else: - num_lines = li.linenum - (last_line + 1) - lines_between = ''.join(['\n' + l - for l in self.get_lines_between(last_line + 1, li.linenum)] - ) + lists.append([li]) - # Only empty lines - if ((num_lines == lines_between.count('\n')) - and (len(lines_between.strip()) == 0) - ): - lists[-1].append(li) - else: - lists.append([li]) - - last_line = li.linenum + sum(c.count('\n') for c in li.content) + last_line = li.linenum return lists - # @DEPRECATED: use `get_lists` def getLists(self): return self.get_lists() @@ -850,34 +838,22 @@ Property = collections.namedtuple( "Property", ("linenum", "match", "key", "value", "options") ) -class ListItem: - def __init__(self, - linenum, match, - indentation, - bullet, counter, counter_sep, - checkbox_indentation, checkbox_value, - tag_indentation, tag, - content, - ): - self.linenum = linenum - self.match = match - self.indentation = indentation - self.bullet = bullet - self.counter = counter - self.counter_sep = counter_sep - self.checkbox_indentation = checkbox_indentation - self.checkbox_value = checkbox_value - self.tag_indentation = tag_indentation - self.tag = tag - self.content = content - - @property - def text_start_pos(self): - return len(self.indentation) + 1 # Indentation + bullet - - def append_line(self, line): - self.content += parse_content_block('\n' + line).contents - +ListItem = collections.namedtuple( + "ListItem", + ( + "linenum", + "match", + "indentation", + "bullet", + "counter", + "counter_sep", + "checkbox_indentation", + "checkbox_value", + "tag_indentation", + "tag", + "content", + ), +) TableRow = collections.namedtuple( "TableRow", ( @@ -1579,20 +1555,14 @@ def parse_contents(raw_contents: List[RawLine]): return [parse_content_block(block) for block in blocks] -def parse_content_block(raw_contents: Union[List[RawLine],str]): +def parse_content_block(raw_contents: List[RawLine]): contents_buff = [] - if isinstance(raw_contents, str): - contents_buff.append(raw_contents) - else: - for line in raw_contents: - contents_buff.append(line.line) + for line in raw_contents: + contents_buff.append(line.line) contents = "\n".join(contents_buff) tokens = tokenize_contents(contents) - if isinstance(raw_contents, str): - current_line = None - else: - current_line = raw_contents[0].linenum + current_line = raw_contents[0].linenum contents = [] # Use tokens to tag chunks of text with it's container type @@ -1619,9 +1589,7 @@ def dump_contents(raw): elif isinstance(raw, ListItem): bullet = raw.bullet if raw.bullet else raw.counter + raw.counter_sep - content_full = token_list_to_raw(raw.content) - content_lines = content_full.split('\n') - content = '\n'.join(content_lines) + content = token_list_to_raw(raw.content) checkbox = f"[{raw.checkbox_value}]" if raw.checkbox_value else "" tag = f"{raw.tag_indentation}{token_list_to_raw(raw.tag or '')}::" if raw.tag or raw.tag_indentation else "" return ( @@ -2010,19 +1978,19 @@ class OrgDocReader: def add_list_item_line(self, linenum: int, match: re.Match) -> int: li = ListItem( - linenum=linenum, - match=match, - indentation=match.group("indentation"), - bullet=match.group("bullet"), - counter=match.group("counter"), - counter_sep=match.group("counter_sep"), - checkbox_indentation=match.group("checkbox_indentation"), - checkbox_value=match.group("checkbox_value"), - tag_indentation=match.group("tag_indentation"), - tag=parse_content_block( + linenum, + match, + match.group("indentation"), + match.group("bullet"), + match.group("counter"), + match.group("counter_sep"), + match.group("checkbox_indentation"), + match.group("checkbox_value"), + match.group("tag_indentation"), + parse_content_block( [RawLine(linenum=linenum, line=match.group("tag"))] ).contents if match.group("tag") else None, - content=parse_content_block( + parse_content_block( [RawLine(linenum=linenum, line=match.group("content"))] ).contents, ) @@ -2031,7 +1999,6 @@ class OrgDocReader: self.list_items.append(li) else: self.headline_hierarchy[-1]["list_items"].append(li) - return li def add_table_line(self, linenum: int, line: str) -> int: chunks = line.split('|') @@ -2141,25 +2108,6 @@ class OrgDocReader: reader = enumerate(lines) in_drawer = False in_block = False - list_item_indentation = None - list_item = None - - def add_raw_line_with_possible_indentation(linenum, line): - added = False - nonlocal list_item - nonlocal list_item_indentation - if list_item: - if ((line[:list_item.text_start_pos].strip() == '') - or (len(line.strip()) == 0) - ): - list_item.append_line(line) - added = True - else: - list_item = None - list_item_indentation = None - - if not added: - self.add_raw_line(linenum, line) for lnum, line in reader: linenum = lnum + 1 @@ -2168,58 +2116,41 @@ class OrgDocReader: if m := END_BLOCK_RE.match(line): self.add_end_block_line(linenum, m) in_block = False - list_item_indentation = None - list_item = None else: - add_raw_line_with_possible_indentation(linenum, line) + self.add_raw_line(linenum, line) elif m := HEADLINE_RE.match(line): - list_item_indentation = None - list_item = None self.add_headline(linenum, m) elif m := LIST_ITEM_RE.match(line): - list_item = self.add_list_item_line(linenum, m) - list_item_indentation = m.group("indentation") + self.add_list_item_line(linenum, m) elif m := RAW_LINE_RE.match(line): - add_raw_line_with_possible_indentation(linenum, line) + self.add_raw_line(linenum, line) # Org-babel elif m := BEGIN_BLOCK_RE.match(line): self.add_begin_block_line(linenum, m) in_block = True - list_item_indentation = None - list_item = None elif m := END_BLOCK_RE.match(line): self.add_end_block_line(linenum, m) in_block = False - list_item_indentation = None - list_item = None # Generic properties elif m := KEYWORDS_RE.match(line): self.add_keyword_line(linenum, m) elif m := DRAWER_END_RE.match(line): self.add_drawer_end_line(linenum, line, m) in_drawer = False - list_item_indentation = None - list_item = None elif (not in_drawer) and (m := DRAWER_START_RE.match(line)): self.add_property_drawer_line(linenum, line, m) in_drawer = True - list_item_indentation = None - list_item = None elif (not in_drawer) and (m := RESULTS_DRAWER_RE.match(line)): self.add_results_drawer_line(linenum, line, m) in_drawer = True - list_item_indentation = None - list_item = None elif m := NODE_PROPERTIES_RE.match(line): self.add_node_properties_line(linenum, m) elif line.strip().startswith('|'): self.add_table_line(linenum, line) - list_item_indentation = None - list_item = None # Not captured else: - add_raw_line_with_possible_indentation(linenum, line) + self.add_raw_line(linenum, line) except: logging.error("Error line {}: {}".format(linenum + 1, line)) raise diff --git a/tests/06-lists.org b/tests/06-lists.org index 0c5448e..b80e358 100644 --- a/tests/06-lists.org +++ b/tests/06-lists.org @@ -51,25 +51,3 @@ Also with markup - _Key_ :: _Value_ - /Key/ 2 :: /Value/ 2 - -* List with multiline elements - :PROPERTIES: - :ID: 07-list-with-multiline-elements - :CREATED: [2020-01-01 Wed 01:01] - :END: - - - This is a list item... - that spans multiple lines - - - This is another list item... - that has content on multiple lines - - Text after a multiline element - - - This is another - multiline list - - #+begin_quote - With a block element inside - #+end_quote - diff --git a/tests/test_org.py b/tests/test_org.py index 21b6518..3509ffc 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -551,7 +551,7 @@ class TestSerde(unittest.TestCase): MarkerToken(closing=False, tok_type=MarkerType.UNDERLINED_MODE), "markup", MarkerToken(closing=True, tok_type=MarkerType.UNDERLINED_MODE), - ".", "\n" + ".", ], ) @@ -567,7 +567,7 @@ class TestSerde(unittest.TestCase): self.assertEqual(lists2[0][0].counter, "1") self.assertEqual(lists2[0][0].counter_sep, ".") - self.assertEqual(lists2[0][1].content, ["Second element", "\n"]) + self.assertEqual(lists2[0][1].content, ["Second element"]) self.assertEqual(lists2[0][1].counter, "2") self.assertEqual(lists2[0][1].counter_sep, ".") @@ -575,24 +575,10 @@ class TestSerde(unittest.TestCase): self.assertEqual(lists2[1][0].counter, "1") self.assertEqual(lists2[1][0].counter_sep, ")") - self.assertEqual(lists2[1][1].content, ["Second element", "\n"]) + self.assertEqual(lists2[1][1].content, ["Second element"]) self.assertEqual(lists2[1][1].counter, "2") self.assertEqual(lists2[1][1].counter_sep, ")") - hl4 = doc.getTopHeadlines()[3] - # ... - lists4 = hl4.getLists() - print(lists4) - self.assertEqual(len(lists4), 2) - - self.assertEqual(lists4[0][0].content, ["This is a list item...", "\n that spans multiple lines", "\n"]) - self.assertEqual(lists4[0][0].bullet, "-") - self.assertEqual(lists4[0][1].content, ["This is another list item...", "\n that has content on multiple lines", "\n"]) - self.assertEqual(lists4[0][1].bullet, "-") - - self.assertEqual(lists4[1][0].content, ["This is another", "\n multiline list", "\n"]) - self.assertEqual(lists4[1][0].bullet, "-") - def test_org_roam_07(self): with open(os.path.join(DIR, "07-org-roam-v2.org")) as f: orig = f.read()