From 123f5c911541928c3d40f26afb1feeb5f20dcc91 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Wed, 16 Apr 2025 00:05:24 +0200
Subject: [PATCH 1/5] test: Propose tests for title parsing changes.

---
 tests/14-titles.org | 12 ++++++++++++
 tests/test_org.py   | 18 ++++++++++++++++++
 2 files changed, 30 insertions(+)
 create mode 100644 tests/14-titles.org
diff --git a/tests/14-titles.org b/tests/14-titles.org
new file mode 100644
index 0000000..75b88a7
--- /dev/null
+++ b/tests/14-titles.org
@@ -0,0 +1,12 @@
+#+TITLE: 14-Simple
+#+DESCRIPTION: Org file to evaluate titles
+#+TODO: TODO(t) PAUSED(p) |  DONE(d)
+
+
+* Simple title
+
+* Simple title with tags :tag:
+
+* Simple title with trailing space 
+
+*  Simple title with leading space
diff --git a/tests/test_org.py b/tests/test_org.py
index a1fdff1..d6b4351 100644
--- a/tests/test_org.py
+++ b/tests/test_org.py
@@ -955,6 +955,24 @@ class TestSerde(unittest.TestCase):
         h1_2_h2 = h1_2.children[0]
         self.assertEqual(sorted(h1_2_h2.tags), ["otherh2tag"])
 
+    def test_titles_file(self):
+        with open(os.path.join(DIR, "14-titles.org")) as f:
+            doc = load(f)
+
+        h1, h2, h3, h4 = doc.getTopHeadlines()
+        self.assertEqual(h1.title.get_text(), "Simple title")
+        self.assertEqual(h2.title.get_text(), "Simple title with tags")
+        self.assertEqual(h3.title.get_text(), "Simple title with trailing space")
+        self.assertEqual(h4.title.get_text(), "Simple title with leading space")
+
+    def test_mimic_write_file_14(self):
+        """A goal of this library is to be able to update a file without changing parts not directly modified."""
+        with open(os.path.join(DIR, "14-titles.org")) as f:
+            orig = f.read()
+            doc = loads(orig)
+
+        self.assertEqual(dumps(doc), orig)
+
     def test_update_headline_from_none_to_todo(self):
         orig = "* First entry"
         doc = loads(orig)
-- 
2.47.2


From 9c54f83ec7f4f868156bfc259a3e602e7d4fa083 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Wed, 16 Apr 2025 00:06:00 +0200
Subject: [PATCH 2/5] revert: Remove old implementation change.

This is reverted as it doesn't return accurately the information that's on the org-mode file.
---
 org_rw/org_rw.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py
index a6ba0da..bc9657a 100644
--- a/org_rw/org_rw.py
+++ b/org_rw/org_rw.py
@@ -1816,7 +1816,7 @@ def token_list_to_plaintext(tok_list) -> str:
         else:
             assert isinstance(chunk, MarkerToken)
 
-    return "".join(contents).strip()
+    return "".join(contents)
 
 
 def token_list_to_raw(tok_list):
-- 
2.47.2


From 527a9e7eb24599b15edadf5e41b66320ccaf5e85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Wed, 16 Apr 2025 00:37:38 +0200
Subject: [PATCH 3/5] feat: Keep headline whitespaces info & remove them from
 title text.

---
 org_rw/org_rw.py | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py
index bc9657a..5c00e75 100644
--- a/org_rw/org_rw.py
+++ b/org_rw/org_rw.py
@@ -67,7 +67,7 @@ BASE_ENVIRONMENT = {
     ),
 }
 
-HEADLINE_TAGS_RE = re.compile(r"((:(\w|[0-9_@#%])+)+:)\s*$")
+HEADLINE_TAGS_RE = re.compile(r"((?P<space_before_tags>\s+)(:(\w|[0-9_@#%])+)+:)(?P<space_after_tags>\s*)$")
 HEADLINE_RE = re.compile(r"^(?P<stars>\*+)(?P<spacing>\s+)(?P<line>.*?)$")
 KEYWORDS_RE = re.compile(
     r"^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$"
@@ -315,6 +315,8 @@ class Headline:
         state,
         tags_start,
         tags,
+        space_before_tags,
+        space_after_tags,
         contents,
         children,
         structural,
@@ -340,6 +342,8 @@ class Headline:
         self.title = parse_content_block([RawLine(linenum=start_line, line=title)])
         self._state = state
         self.tags_start = tags_start
+        self.space_before_tags = space_before_tags
+        self.space_after_tags = space_after_tags
         self.shallow_tags = tags
         self.contents = contents
         self.children = children
@@ -2182,8 +2186,11 @@ def parse_headline(hl, doc, parent) -> Headline:
 
     if hl_tags is None:
         tags = []
+        space_before_tags = space_after_tags = ''
     else:
-        tags = hl_tags.group(0)[1:-1].split(":")
+        tags = hl_tags.group(0).strip()[1:-1].split(":")
+        space_before_tags = hl_tags.group('space_before_tags') or ''
+        space_after_tags = hl_tags.group('space_after_tags') or ''
         line = HEADLINE_TAGS_RE.sub("", line)
 
     hl_state = None
@@ -2203,6 +2210,13 @@ def parse_headline(hl, doc, parent) -> Headline:
                 is_done = True
                 break
 
+    if len(tags) == 0:
+        # No tags, so title might contain trailing whitespaces, handle it
+        title_ends_with_whitespace_match = re.search(r'\s+$', title)
+        if title_ends_with_whitespace_match is not None:
+            space_before_tags = title_ends_with_whitespace_match.group(0)
+            title = title[:-len(space_before_tags)]
+
     contents = parse_contents(hl["contents"])
 
     if not (isinstance(parent, OrgDoc) or depth > parent.depth):
@@ -2229,6 +2243,8 @@ def parse_headline(hl, doc, parent) -> Headline:
         priority_start=None,
         tags_start=None,
         tags=tags,
+        space_before_tags=space_before_tags,
+        space_after_tags=space_after_tags,
         parent=parent,
         is_todo=is_todo,
         is_done=is_done,
@@ -2430,25 +2446,21 @@ class OrgDoc:
 
     # Writing
     def dump_headline(self, headline, recursive=True):
-        tags = ""
+        tags = headline.space_before_tags
         if len(headline.shallow_tags) > 0:
-            tags = ":" + ":".join(headline.shallow_tags) + ":"
+            tags += ":" + ":".join(headline.shallow_tags) + ":" + headline.space_after_tags
 
         state = ""
         if headline._state:
             state = headline._state["name"] + " "
 
         raw_title = token_list_to_raw(headline.title.contents)
-        tags_padding = ""
-        if not (raw_title.endswith(" ") or raw_title.endswith("\t")) and tags:
-            tags_padding = " "
 
         yield (
             "*" * headline.depth
             + headline.spacing
             + state
             + raw_title
-            + tags_padding
             + tags
         )
 
-- 
2.47.2


From 14e344981bca94cb91e751dfbc39a3bd616580cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Wed, 16 Apr 2025 00:39:08 +0200
Subject: [PATCH 4/5] format: Apply black formatter.

---
 org_rw/org_rw.py | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py
index 5c00e75..fdbd177 100644
--- a/org_rw/org_rw.py
+++ b/org_rw/org_rw.py
@@ -67,7 +67,9 @@ BASE_ENVIRONMENT = {
     ),
 }
 
-HEADLINE_TAGS_RE = re.compile(r"((?P<space_before_tags>\s+)(:(\w|[0-9_@#%])+)+:)(?P<space_after_tags>\s*)$")
+HEADLINE_TAGS_RE = re.compile(
+    r"((?P<space_before_tags>\s+)(:(\w|[0-9_@#%])+)+:)(?P<space_after_tags>\s*)$"
+)
 HEADLINE_RE = re.compile(r"^(?P<stars>\*+)(?P<spacing>\s+)(?P<line>.*?)$")
 KEYWORDS_RE = re.compile(
     r"^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$"
@@ -2186,11 +2188,11 @@ def parse_headline(hl, doc, parent) -> Headline:
 
     if hl_tags is None:
         tags = []
-        space_before_tags = space_after_tags = ''
+        space_before_tags = space_after_tags = ""
     else:
         tags = hl_tags.group(0).strip()[1:-1].split(":")
-        space_before_tags = hl_tags.group('space_before_tags') or ''
-        space_after_tags = hl_tags.group('space_after_tags') or ''
+        space_before_tags = hl_tags.group("space_before_tags") or ""
+        space_after_tags = hl_tags.group("space_after_tags") or ""
         line = HEADLINE_TAGS_RE.sub("", line)
 
     hl_state = None
@@ -2212,10 +2214,10 @@ def parse_headline(hl, doc, parent) -> Headline:
 
     if len(tags) == 0:
         # No tags, so title might contain trailing whitespaces, handle it
-        title_ends_with_whitespace_match = re.search(r'\s+$', title)
+        title_ends_with_whitespace_match = re.search(r"\s+$", title)
         if title_ends_with_whitespace_match is not None:
             space_before_tags = title_ends_with_whitespace_match.group(0)
-            title = title[:-len(space_before_tags)]
+            title = title[: -len(space_before_tags)]
 
     contents = parse_contents(hl["contents"])
 
@@ -2448,7 +2450,9 @@ class OrgDoc:
     def dump_headline(self, headline, recursive=True):
         tags = headline.space_before_tags
         if len(headline.shallow_tags) > 0:
-            tags += ":" + ":".join(headline.shallow_tags) + ":" + headline.space_after_tags
+            tags += (
+                ":" + ":".join(headline.shallow_tags) + ":" + headline.space_after_tags
+            )
 
         state = ""
         if headline._state:
@@ -2456,13 +2460,7 @@ class OrgDoc:
 
         raw_title = token_list_to_raw(headline.title.contents)
 
-        yield (
-            "*" * headline.depth
-            + headline.spacing
-            + state
-            + raw_title
-            + tags
-        )
+        yield ("*" * headline.depth + headline.spacing + state + raw_title + tags)
 
         planning = headline.get_planning_line()
         if planning is not None:
-- 
2.47.2


From 3193ecbc363fea3f4c7f0c163f67f6cc68825711 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Wed, 16 Apr 2025 00:41:27 +0200
Subject: [PATCH 5/5] fix: Creation of new headlines.

---
 org_rw/org_rw.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py
index fdbd177..77644b7 100644
--- a/org_rw/org_rw.py
+++ b/org_rw/org_rw.py
@@ -1078,6 +1078,8 @@ class Headline:
             state="",
             tags_start=None,
             tags=[],
+            space_before_tags="",
+            space_after_tags="",
             contents=[],
             children=[],
             structural=[],
-- 
2.47.2