feat: Keep headline whitespaces info & remove them from title text.

2025-04-16 00:37:38 +02:00 · 2025-04-16 00:37:38 +02:00 · 527a9e7eb2
commit 527a9e7eb2
parent 9c54f83ec7
1 changed files with 20 additions and 8 deletions
--- a/org_rw/org_rw.py
+++ b/org_rw/org_rw.py
@ -67,7 +67,7 @@ BASE_ENVIRONMENT = {
    ),
 }

-HEADLINE_TAGS_RE = re.compile(r"((:(\w|[0-9_@#%])+)+:)\s*$")
+HEADLINE_TAGS_RE = re.compile(r"((?P<space_before_tags>\s+)(:(\w|[0-9_@#%])+)+:)(?P<space_after_tags>\s*)$")
 HEADLINE_RE = re.compile(r"^(?P<stars>\*+)(?P<spacing>\s+)(?P<line>.*?)$")
 KEYWORDS_RE = re.compile(
    r"^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$"
@ -315,6 +315,8 @@ class Headline:
        state,
        tags_start,
        tags,
+        space_before_tags,
+        space_after_tags,
        contents,
        children,
        structural,
@ -340,6 +342,8 @@ class Headline:
        self.title = parse_content_block([RawLine(linenum=start_line, line=title)])
        self._state = state
        self.tags_start = tags_start
+        self.space_before_tags = space_before_tags
+        self.space_after_tags = space_after_tags
        self.shallow_tags = tags
        self.contents = contents
        self.children = children
@ -2182,8 +2186,11 @@ def parse_headline(hl, doc, parent) -> Headline:

    if hl_tags is None:
        tags = []
+        space_before_tags = space_after_tags = ''
    else:
-        tags = hl_tags.group(0)[1:-1].split(":")
+        tags = hl_tags.group(0).strip()[1:-1].split(":")
+        space_before_tags = hl_tags.group('space_before_tags') or ''
+        space_after_tags = hl_tags.group('space_after_tags') or ''
        line = HEADLINE_TAGS_RE.sub("", line)

    hl_state = None
@ -2203,6 +2210,13 @@ def parse_headline(hl, doc, parent) -> Headline:
                is_done = True
                break

+    if len(tags) == 0:
+        # No tags, so title might contain trailing whitespaces, handle it
+        title_ends_with_whitespace_match = re.search(r'\s+$', title)
+        if title_ends_with_whitespace_match is not None:
+            space_before_tags = title_ends_with_whitespace_match.group(0)
+            title = title[:-len(space_before_tags)]
+
    contents = parse_contents(hl["contents"])

    if not (isinstance(parent, OrgDoc) or depth > parent.depth):
@ -2229,6 +2243,8 @@ def parse_headline(hl, doc, parent) -> Headline:
        priority_start=None,
        tags_start=None,
        tags=tags,
+        space_before_tags=space_before_tags,
+        space_after_tags=space_after_tags,
        parent=parent,
        is_todo=is_todo,
        is_done=is_done,
@ -2430,25 +2446,21 @@ class OrgDoc:

    # Writing
    def dump_headline(self, headline, recursive=True):
-        tags = ""
+        tags = headline.space_before_tags
        if len(headline.shallow_tags) > 0:
-            tags = ":" + ":".join(headline.shallow_tags) + ":"
+            tags += ":" + ":".join(headline.shallow_tags) + ":" + headline.space_after_tags

        state = ""
        if headline._state:
            state = headline._state["name"] + " "

        raw_title = token_list_to_raw(headline.title.contents)
-        tags_padding = ""
-        if not (raw_title.endswith(" ") or raw_title.endswith("\t")) and tags:
-            tags_padding = " "

        yield (
            "*" * headline.depth
            + headline.spacing
            + state
            + raw_title
-            + tags_padding
            + tags
        )