Compare commits

...

2 Commits

Author SHA1 Message Date
Sergio Martínez Portela
b819904457 Fix handling of block elements after multiline list items. 2022-11-12 18:01:19 +01:00
Sergio Martínez Portela
6c6c375572 WIP: Add support for multiline list items.
Right now this messes with line numbers.
2022-11-12 12:40:57 +01:00
3 changed files with 148 additions and 43 deletions

View File

@ -549,14 +549,26 @@ class Headline:
last_line = None last_line = None
for li in self.list_items: for li in self.list_items:
if last_line == li.linenum - 1: if last_line is None:
lists[-1].append(li)
else:
lists.append([li]) lists.append([li])
else:
num_lines = li.linenum - (last_line + 1)
lines_between = ''.join(['\n' + l
for l in self.get_lines_between(last_line + 1, li.linenum)]
)
last_line = li.linenum # Only empty lines
if ((num_lines == lines_between.count('\n'))
and (len(lines_between.strip()) == 0)
):
lists[-1].append(li)
else:
lists.append([li])
last_line = li.linenum + sum(c.count('\n') for c in li.content)
return lists return lists
# @DEPRECATED: use `get_lists`
def getLists(self): def getLists(self):
return self.get_lists() return self.get_lists()
@ -838,22 +850,34 @@ Property = collections.namedtuple(
"Property", ("linenum", "match", "key", "value", "options") "Property", ("linenum", "match", "key", "value", "options")
) )
ListItem = collections.namedtuple( class ListItem:
"ListItem", def __init__(self,
( linenum, match,
"linenum", indentation,
"match", bullet, counter, counter_sep,
"indentation", checkbox_indentation, checkbox_value,
"bullet", tag_indentation, tag,
"counter", content,
"counter_sep", ):
"checkbox_indentation", self.linenum = linenum
"checkbox_value", self.match = match
"tag_indentation", self.indentation = indentation
"tag", self.bullet = bullet
"content", self.counter = counter
), self.counter_sep = counter_sep
) self.checkbox_indentation = checkbox_indentation
self.checkbox_value = checkbox_value
self.tag_indentation = tag_indentation
self.tag = tag
self.content = content
@property
def text_start_pos(self):
return len(self.indentation) + 1 # Indentation + bullet
def append_line(self, line):
self.content += parse_content_block('\n' + line).contents
TableRow = collections.namedtuple( TableRow = collections.namedtuple(
"TableRow", "TableRow",
( (
@ -1555,14 +1579,20 @@ def parse_contents(raw_contents: List[RawLine]):
return [parse_content_block(block) for block in blocks] return [parse_content_block(block) for block in blocks]
def parse_content_block(raw_contents: List[RawLine]): def parse_content_block(raw_contents: Union[List[RawLine],str]):
contents_buff = [] contents_buff = []
for line in raw_contents: if isinstance(raw_contents, str):
contents_buff.append(line.line) contents_buff.append(raw_contents)
else:
for line in raw_contents:
contents_buff.append(line.line)
contents = "\n".join(contents_buff) contents = "\n".join(contents_buff)
tokens = tokenize_contents(contents) tokens = tokenize_contents(contents)
current_line = raw_contents[0].linenum if isinstance(raw_contents, str):
current_line = None
else:
current_line = raw_contents[0].linenum
contents = [] contents = []
# Use tokens to tag chunks of text with it's container type # Use tokens to tag chunks of text with it's container type
@ -1589,7 +1619,9 @@ def dump_contents(raw):
elif isinstance(raw, ListItem): elif isinstance(raw, ListItem):
bullet = raw.bullet if raw.bullet else raw.counter + raw.counter_sep bullet = raw.bullet if raw.bullet else raw.counter + raw.counter_sep
content = token_list_to_raw(raw.content) content_full = token_list_to_raw(raw.content)
content_lines = content_full.split('\n')
content = '\n'.join(content_lines)
checkbox = f"[{raw.checkbox_value}]" if raw.checkbox_value else "" checkbox = f"[{raw.checkbox_value}]" if raw.checkbox_value else ""
tag = f"{raw.tag_indentation}{token_list_to_raw(raw.tag or '')}::" if raw.tag or raw.tag_indentation else "" tag = f"{raw.tag_indentation}{token_list_to_raw(raw.tag or '')}::" if raw.tag or raw.tag_indentation else ""
return ( return (
@ -1978,19 +2010,19 @@ class OrgDocReader:
def add_list_item_line(self, linenum: int, match: re.Match) -> int: def add_list_item_line(self, linenum: int, match: re.Match) -> int:
li = ListItem( li = ListItem(
linenum, linenum=linenum,
match, match=match,
match.group("indentation"), indentation=match.group("indentation"),
match.group("bullet"), bullet=match.group("bullet"),
match.group("counter"), counter=match.group("counter"),
match.group("counter_sep"), counter_sep=match.group("counter_sep"),
match.group("checkbox_indentation"), checkbox_indentation=match.group("checkbox_indentation"),
match.group("checkbox_value"), checkbox_value=match.group("checkbox_value"),
match.group("tag_indentation"), tag_indentation=match.group("tag_indentation"),
parse_content_block( tag=parse_content_block(
[RawLine(linenum=linenum, line=match.group("tag"))] [RawLine(linenum=linenum, line=match.group("tag"))]
).contents if match.group("tag") else None, ).contents if match.group("tag") else None,
parse_content_block( content=parse_content_block(
[RawLine(linenum=linenum, line=match.group("content"))] [RawLine(linenum=linenum, line=match.group("content"))]
).contents, ).contents,
) )
@ -1999,6 +2031,7 @@ class OrgDocReader:
self.list_items.append(li) self.list_items.append(li)
else: else:
self.headline_hierarchy[-1]["list_items"].append(li) self.headline_hierarchy[-1]["list_items"].append(li)
return li
def add_table_line(self, linenum: int, line: str) -> int: def add_table_line(self, linenum: int, line: str) -> int:
chunks = line.split('|') chunks = line.split('|')
@ -2108,6 +2141,25 @@ class OrgDocReader:
reader = enumerate(lines) reader = enumerate(lines)
in_drawer = False in_drawer = False
in_block = False in_block = False
list_item_indentation = None
list_item = None
def add_raw_line_with_possible_indentation(linenum, line):
added = False
nonlocal list_item
nonlocal list_item_indentation
if list_item:
if ((line[:list_item.text_start_pos].strip() == '')
or (len(line.strip()) == 0)
):
list_item.append_line(line)
added = True
else:
list_item = None
list_item_indentation = None
if not added:
self.add_raw_line(linenum, line)
for lnum, line in reader: for lnum, line in reader:
linenum = lnum + 1 linenum = lnum + 1
@ -2116,41 +2168,58 @@ class OrgDocReader:
if m := END_BLOCK_RE.match(line): if m := END_BLOCK_RE.match(line):
self.add_end_block_line(linenum, m) self.add_end_block_line(linenum, m)
in_block = False in_block = False
list_item_indentation = None
list_item = None
else: else:
self.add_raw_line(linenum, line) add_raw_line_with_possible_indentation(linenum, line)
elif m := HEADLINE_RE.match(line): elif m := HEADLINE_RE.match(line):
list_item_indentation = None
list_item = None
self.add_headline(linenum, m) self.add_headline(linenum, m)
elif m := LIST_ITEM_RE.match(line): elif m := LIST_ITEM_RE.match(line):
self.add_list_item_line(linenum, m) list_item = self.add_list_item_line(linenum, m)
list_item_indentation = m.group("indentation")
elif m := RAW_LINE_RE.match(line): elif m := RAW_LINE_RE.match(line):
self.add_raw_line(linenum, line) add_raw_line_with_possible_indentation(linenum, line)
# Org-babel # Org-babel
elif m := BEGIN_BLOCK_RE.match(line): elif m := BEGIN_BLOCK_RE.match(line):
self.add_begin_block_line(linenum, m) self.add_begin_block_line(linenum, m)
in_block = True in_block = True
list_item_indentation = None
list_item = None
elif m := END_BLOCK_RE.match(line): elif m := END_BLOCK_RE.match(line):
self.add_end_block_line(linenum, m) self.add_end_block_line(linenum, m)
in_block = False in_block = False
list_item_indentation = None
list_item = None
# Generic properties # Generic properties
elif m := KEYWORDS_RE.match(line): elif m := KEYWORDS_RE.match(line):
self.add_keyword_line(linenum, m) self.add_keyword_line(linenum, m)
elif m := DRAWER_END_RE.match(line): elif m := DRAWER_END_RE.match(line):
self.add_drawer_end_line(linenum, line, m) self.add_drawer_end_line(linenum, line, m)
in_drawer = False in_drawer = False
list_item_indentation = None
list_item = None
elif (not in_drawer) and (m := DRAWER_START_RE.match(line)): elif (not in_drawer) and (m := DRAWER_START_RE.match(line)):
self.add_property_drawer_line(linenum, line, m) self.add_property_drawer_line(linenum, line, m)
in_drawer = True in_drawer = True
list_item_indentation = None
list_item = None
elif (not in_drawer) and (m := RESULTS_DRAWER_RE.match(line)): elif (not in_drawer) and (m := RESULTS_DRAWER_RE.match(line)):
self.add_results_drawer_line(linenum, line, m) self.add_results_drawer_line(linenum, line, m)
in_drawer = True in_drawer = True
list_item_indentation = None
list_item = None
elif m := NODE_PROPERTIES_RE.match(line): elif m := NODE_PROPERTIES_RE.match(line):
self.add_node_properties_line(linenum, m) self.add_node_properties_line(linenum, m)
elif line.strip().startswith('|'): elif line.strip().startswith('|'):
self.add_table_line(linenum, line) self.add_table_line(linenum, line)
list_item_indentation = None
list_item = None
# Not captured # Not captured
else: else:
self.add_raw_line(linenum, line) add_raw_line_with_possible_indentation(linenum, line)
except: except:
logging.error("Error line {}: {}".format(linenum + 1, line)) logging.error("Error line {}: {}".format(linenum + 1, line))
raise raise

View File

@ -51,3 +51,25 @@ Also with markup
- _Key_ :: _Value_ - _Key_ :: _Value_
- /Key/ 2 :: /Value/ 2 - /Key/ 2 :: /Value/ 2
* List with multiline elements
:PROPERTIES:
:ID: 07-list-with-multiline-elements
:CREATED: [2020-01-01 Wed 01:01]
:END:
- This is a list item...
that spans multiple lines
- This is another list item...
that has content on multiple lines
Text after a multiline element
- This is another
multiline list
#+begin_quote
With a block element inside
#+end_quote

View File

@ -551,7 +551,7 @@ class TestSerde(unittest.TestCase):
MarkerToken(closing=False, tok_type=MarkerType.UNDERLINED_MODE), MarkerToken(closing=False, tok_type=MarkerType.UNDERLINED_MODE),
"markup", "markup",
MarkerToken(closing=True, tok_type=MarkerType.UNDERLINED_MODE), MarkerToken(closing=True, tok_type=MarkerType.UNDERLINED_MODE),
".", ".", "\n"
], ],
) )
@ -567,7 +567,7 @@ class TestSerde(unittest.TestCase):
self.assertEqual(lists2[0][0].counter, "1") self.assertEqual(lists2[0][0].counter, "1")
self.assertEqual(lists2[0][0].counter_sep, ".") self.assertEqual(lists2[0][0].counter_sep, ".")
self.assertEqual(lists2[0][1].content, ["Second element"]) self.assertEqual(lists2[0][1].content, ["Second element", "\n"])
self.assertEqual(lists2[0][1].counter, "2") self.assertEqual(lists2[0][1].counter, "2")
self.assertEqual(lists2[0][1].counter_sep, ".") self.assertEqual(lists2[0][1].counter_sep, ".")
@ -575,10 +575,24 @@ class TestSerde(unittest.TestCase):
self.assertEqual(lists2[1][0].counter, "1") self.assertEqual(lists2[1][0].counter, "1")
self.assertEqual(lists2[1][0].counter_sep, ")") self.assertEqual(lists2[1][0].counter_sep, ")")
self.assertEqual(lists2[1][1].content, ["Second element"]) self.assertEqual(lists2[1][1].content, ["Second element", "\n"])
self.assertEqual(lists2[1][1].counter, "2") self.assertEqual(lists2[1][1].counter, "2")
self.assertEqual(lists2[1][1].counter_sep, ")") self.assertEqual(lists2[1][1].counter_sep, ")")
hl4 = doc.getTopHeadlines()[3]
# ...
lists4 = hl4.getLists()
print(lists4)
self.assertEqual(len(lists4), 2)
self.assertEqual(lists4[0][0].content, ["This is a list item...", "\n that spans multiple lines", "\n"])
self.assertEqual(lists4[0][0].bullet, "-")
self.assertEqual(lists4[0][1].content, ["This is another list item...", "\n that has content on multiple lines", "\n"])
self.assertEqual(lists4[0][1].bullet, "-")
self.assertEqual(lists4[1][0].content, ["This is another", "\n multiline list", "\n"])
self.assertEqual(lists4[1][0].bullet, "-")
def test_org_roam_07(self): def test_org_roam_07(self):
with open(os.path.join(DIR, "07-org-roam-v2.org")) as f: with open(os.path.join(DIR, "07-org-roam-v2.org")) as f:
orig = f.read() orig = f.read()