From 6710775882e6649da3a2b2a6f05c150c849be516 Mon Sep 17 00:00:00 2001
From: Lyz <lyz@riseup.net>
Date: Sat, 25 Jan 2025 14:22:23 +0100
Subject: [PATCH 1/8] fix: strip token_list_to_plaintext

otherwise when you do headline.title.get_text() you may have trailing
whitespaces
---
 org_rw/org_rw.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py
index 31b904c..4fc5da5 100644
--- a/org_rw/org_rw.py
+++ b/org_rw/org_rw.py
@@ -415,7 +415,6 @@ class Headline:
                     isinstance(line, DelimiterLine)
                     and line.delimiter_type == DelimiterLineType.END_BLOCK
                 ):
-
                     start = current_node.header.linenum
                     end = line.linenum
 
@@ -815,7 +814,6 @@ class Headline:
 
     def set_property(self, name: str, value: str):
         for prop in self.properties:
-
             # A matching property is found, update it
             if prop.key == name:
                 prop.value = value
@@ -1000,7 +998,6 @@ class Headline:
                             and result_first[0] == "structural"
                             and result_first[1].strip().upper() == ":RESULTS:"
                         ):
-
                             (end_line, _) = self.get_structural_end_after(
                                 kword.linenum + 1
                             )
@@ -1795,7 +1792,7 @@ def token_list_to_plaintext(tok_list) -> str:
         else:
             assert isinstance(chunk, MarkerToken)
 
-    return "".join(contents)
+    return "".join(contents).strip()
 
 
 def token_list_to_raw(tok_list):
@@ -2017,7 +2014,6 @@ def tokenize_contents(contents: str) -> List[TokenItems]:
             and is_pre(last_char)
             and ((i + 1 < len(contents)) and is_border(contents[i + 1]))
         ):
-
             is_valid_mark = False
             # Check that is closed later
             text_in_line = True
@@ -2408,7 +2404,6 @@ class OrgDoc:
 
     # Writing
     def dump_headline(self, headline, recursive=True):
-
         tags = ""
         if len(headline.shallow_tags) > 0:
             tags = ":" + ":".join(headline.shallow_tags) + ":"
@@ -2422,7 +2417,14 @@ class OrgDoc:
         if not (raw_title.endswith(" ") or raw_title.endswith("\t")) and tags:
             tags_padding = " "
 
-        yield "*" * headline.depth + headline.spacing + state + raw_title + tags_padding + tags
+        yield (
+            "*" * headline.depth
+            + headline.spacing
+            + state
+            + raw_title
+            + tags_padding
+            + tags
+        )
 
         planning = headline.get_planning_line()
         if planning is not None:

From 123f5c911541928c3d40f26afb1feeb5f20dcc91 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Wed, 16 Apr 2025 00:05:24 +0200
Subject: [PATCH 2/8] test: Propose tests for title parsing changes.

---
 tests/14-titles.org | 12 ++++++++++++
 tests/test_org.py   | 18 ++++++++++++++++++
 2 files changed, 30 insertions(+)
 create mode 100644 tests/14-titles.org

diff --git a/tests/14-titles.org b/tests/14-titles.org
new file mode 100644
index 0000000..75b88a7
--- /dev/null
+++ b/tests/14-titles.org
@@ -0,0 +1,12 @@
+#+TITLE: 14-Simple
+#+DESCRIPTION: Org file to evaluate titles
+#+TODO: TODO(t) PAUSED(p) |  DONE(d)
+
+
+* Simple title
+
+* Simple title with tags :tag:
+
+* Simple title with trailing space 
+
+*  Simple title with leading space
diff --git a/tests/test_org.py b/tests/test_org.py
index a1fdff1..d6b4351 100644
--- a/tests/test_org.py
+++ b/tests/test_org.py
@@ -955,6 +955,24 @@ class TestSerde(unittest.TestCase):
         h1_2_h2 = h1_2.children[0]
         self.assertEqual(sorted(h1_2_h2.tags), ["otherh2tag"])
 
+    def test_titles_file(self):
+        with open(os.path.join(DIR, "14-titles.org")) as f:
+            doc = load(f)
+
+        h1, h2, h3, h4 = doc.getTopHeadlines()
+        self.assertEqual(h1.title.get_text(), "Simple title")
+        self.assertEqual(h2.title.get_text(), "Simple title with tags")
+        self.assertEqual(h3.title.get_text(), "Simple title with trailing space")
+        self.assertEqual(h4.title.get_text(), "Simple title with leading space")
+
+    def test_mimic_write_file_14(self):
+        """A goal of this library is to be able to update a file without changing parts not directly modified."""
+        with open(os.path.join(DIR, "14-titles.org")) as f:
+            orig = f.read()
+            doc = loads(orig)
+
+        self.assertEqual(dumps(doc), orig)
+
     def test_update_headline_from_none_to_todo(self):
         orig = "* First entry"
         doc = loads(orig)

From 9c54f83ec7f4f868156bfc259a3e602e7d4fa083 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Wed, 16 Apr 2025 00:06:00 +0200
Subject: [PATCH 3/8] revert: Remove old implementation change.

This is reverted as it doesn't return accurately the information that's on the org-mode file.
---
 org_rw/org_rw.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py
index a6ba0da..bc9657a 100644
--- a/org_rw/org_rw.py
+++ b/org_rw/org_rw.py
@@ -1816,7 +1816,7 @@ def token_list_to_plaintext(tok_list) -> str:
         else:
             assert isinstance(chunk, MarkerToken)
 
-    return "".join(contents).strip()
+    return "".join(contents)
 
 
 def token_list_to_raw(tok_list):

From 527a9e7eb24599b15edadf5e41b66320ccaf5e85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Wed, 16 Apr 2025 00:37:38 +0200
Subject: [PATCH 4/8] feat: Keep headline whitespaces info & remove them from
 title text.

---
 org_rw/org_rw.py | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py
index bc9657a..5c00e75 100644
--- a/org_rw/org_rw.py
+++ b/org_rw/org_rw.py
@@ -67,7 +67,7 @@ BASE_ENVIRONMENT = {
     ),
 }
 
-HEADLINE_TAGS_RE = re.compile(r"((:(\w|[0-9_@#%])+)+:)\s*$")
+HEADLINE_TAGS_RE = re.compile(r"((?P<space_before_tags>\s+)(:(\w|[0-9_@#%])+)+:)(?P<space_after_tags>\s*)$")
 HEADLINE_RE = re.compile(r"^(?P<stars>\*+)(?P<spacing>\s+)(?P<line>.*?)$")
 KEYWORDS_RE = re.compile(
     r"^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$"
@@ -315,6 +315,8 @@ class Headline:
         state,
         tags_start,
         tags,
+        space_before_tags,
+        space_after_tags,
         contents,
         children,
         structural,
@@ -340,6 +342,8 @@ class Headline:
         self.title = parse_content_block([RawLine(linenum=start_line, line=title)])
         self._state = state
         self.tags_start = tags_start
+        self.space_before_tags = space_before_tags
+        self.space_after_tags = space_after_tags
         self.shallow_tags = tags
         self.contents = contents
         self.children = children
@@ -2182,8 +2186,11 @@ def parse_headline(hl, doc, parent) -> Headline:
 
     if hl_tags is None:
         tags = []
+        space_before_tags = space_after_tags = ''
     else:
-        tags = hl_tags.group(0)[1:-1].split(":")
+        tags = hl_tags.group(0).strip()[1:-1].split(":")
+        space_before_tags = hl_tags.group('space_before_tags') or ''
+        space_after_tags = hl_tags.group('space_after_tags') or ''
         line = HEADLINE_TAGS_RE.sub("", line)
 
     hl_state = None
@@ -2203,6 +2210,13 @@ def parse_headline(hl, doc, parent) -> Headline:
                 is_done = True
                 break
 
+    if len(tags) == 0:
+        # No tags, so title might contain trailing whitespaces, handle it
+        title_ends_with_whitespace_match = re.search(r'\s+$', title)
+        if title_ends_with_whitespace_match is not None:
+            space_before_tags = title_ends_with_whitespace_match.group(0)
+            title = title[:-len(space_before_tags)]
+
     contents = parse_contents(hl["contents"])
 
     if not (isinstance(parent, OrgDoc) or depth > parent.depth):
@@ -2229,6 +2243,8 @@ def parse_headline(hl, doc, parent) -> Headline:
         priority_start=None,
         tags_start=None,
         tags=tags,
+        space_before_tags=space_before_tags,
+        space_after_tags=space_after_tags,
         parent=parent,
         is_todo=is_todo,
         is_done=is_done,
@@ -2430,25 +2446,21 @@ class OrgDoc:
 
     # Writing
     def dump_headline(self, headline, recursive=True):
-        tags = ""
+        tags = headline.space_before_tags
         if len(headline.shallow_tags) > 0:
-            tags = ":" + ":".join(headline.shallow_tags) + ":"
+            tags += ":" + ":".join(headline.shallow_tags) + ":" + headline.space_after_tags
 
         state = ""
         if headline._state:
             state = headline._state["name"] + " "
 
         raw_title = token_list_to_raw(headline.title.contents)
-        tags_padding = ""
-        if not (raw_title.endswith(" ") or raw_title.endswith("\t")) and tags:
-            tags_padding = " "
 
         yield (
             "*" * headline.depth
             + headline.spacing
             + state
             + raw_title
-            + tags_padding
             + tags
         )
 

From 14e344981bca94cb91e751dfbc39a3bd616580cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Wed, 16 Apr 2025 00:39:08 +0200
Subject: [PATCH 5/8] format: Apply black formatter.

---
 org_rw/org_rw.py | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py
index 5c00e75..fdbd177 100644
--- a/org_rw/org_rw.py
+++ b/org_rw/org_rw.py
@@ -67,7 +67,9 @@ BASE_ENVIRONMENT = {
     ),
 }
 
-HEADLINE_TAGS_RE = re.compile(r"((?P<space_before_tags>\s+)(:(\w|[0-9_@#%])+)+:)(?P<space_after_tags>\s*)$")
+HEADLINE_TAGS_RE = re.compile(
+    r"((?P<space_before_tags>\s+)(:(\w|[0-9_@#%])+)+:)(?P<space_after_tags>\s*)$"
+)
 HEADLINE_RE = re.compile(r"^(?P<stars>\*+)(?P<spacing>\s+)(?P<line>.*?)$")
 KEYWORDS_RE = re.compile(
     r"^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$"
@@ -2186,11 +2188,11 @@ def parse_headline(hl, doc, parent) -> Headline:
 
     if hl_tags is None:
         tags = []
-        space_before_tags = space_after_tags = ''
+        space_before_tags = space_after_tags = ""
     else:
         tags = hl_tags.group(0).strip()[1:-1].split(":")
-        space_before_tags = hl_tags.group('space_before_tags') or ''
-        space_after_tags = hl_tags.group('space_after_tags') or ''
+        space_before_tags = hl_tags.group("space_before_tags") or ""
+        space_after_tags = hl_tags.group("space_after_tags") or ""
         line = HEADLINE_TAGS_RE.sub("", line)
 
     hl_state = None
@@ -2212,10 +2214,10 @@ def parse_headline(hl, doc, parent) -> Headline:
 
     if len(tags) == 0:
         # No tags, so title might contain trailing whitespaces, handle it
-        title_ends_with_whitespace_match = re.search(r'\s+$', title)
+        title_ends_with_whitespace_match = re.search(r"\s+$", title)
         if title_ends_with_whitespace_match is not None:
             space_before_tags = title_ends_with_whitespace_match.group(0)
-            title = title[:-len(space_before_tags)]
+            title = title[: -len(space_before_tags)]
 
     contents = parse_contents(hl["contents"])
 
@@ -2448,7 +2450,9 @@ class OrgDoc:
     def dump_headline(self, headline, recursive=True):
         tags = headline.space_before_tags
         if len(headline.shallow_tags) > 0:
-            tags += ":" + ":".join(headline.shallow_tags) + ":" + headline.space_after_tags
+            tags += (
+                ":" + ":".join(headline.shallow_tags) + ":" + headline.space_after_tags
+            )
 
         state = ""
         if headline._state:
@@ -2456,13 +2460,7 @@ class OrgDoc:
 
         raw_title = token_list_to_raw(headline.title.contents)
 
-        yield (
-            "*" * headline.depth
-            + headline.spacing
-            + state
-            + raw_title
-            + tags
-        )
+        yield ("*" * headline.depth + headline.spacing + state + raw_title + tags)
 
         planning = headline.get_planning_line()
         if planning is not None:

From 3193ecbc363fea3f4c7f0c163f67f6cc68825711 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Wed, 16 Apr 2025 00:41:27 +0200
Subject: [PATCH 6/8] fix: Creation of new headlines.

---
 org_rw/org_rw.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py
index fdbd177..77644b7 100644
--- a/org_rw/org_rw.py
+++ b/org_rw/org_rw.py
@@ -1078,6 +1078,8 @@ class Headline:
             state="",
             tags_start=None,
             tags=[],
+            space_before_tags="",
+            space_after_tags="",
             contents=[],
             children=[],
             structural=[],

From f936bccf7f668d356bdf1cee596d94a6a2e567a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Wed, 16 Apr 2025 00:46:52 +0200
Subject: [PATCH 7/8] doc: Add a small "Principles" section to README.

---
 README.org | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/README.org b/README.org
index 95ec98a..253c8f6 100644
--- a/README.org
+++ b/README.org
@@ -7,6 +7,10 @@ A python library to parse, modify and save Org-mode files.
 - Modify these data and write it back to disk.
 - Keep the original structure intact (indentation, spaces, format, ...).
 
+** Principles
+- Data structures should be exposed as it's read on Emacs's org-mode or when in doubt as raw as possible.
+- Data in the objects should be modificable, as a way to update the document itself. *Consider this a Object-oriented design.*
+- *Modification of the original text if there's no change is considered a bug (see [[id:7363ba38-1662-4d3c-9e83-0999824975b7][Known issues]]).*
 ** Safety mechanism
 As this library is still in early development. Running it over files might
 produce unexpected changes on them. For this reason it's heavily recommended to
@@ -21,6 +25,9 @@ Also, see [[id:76e77f7f-c9e0-4c83-ad2f-39a5a8894a83][Known issues:Structure modi
 not properly stored and can trigger this safety mechanism on a false-positive.
 
 * Known issues
+:PROPERTIES:
+:ID:       7363ba38-1662-4d3c-9e83-0999824975b7
+:END:
 ** Structure modifications
 :PROPERTIES:
 :ID:       76e77f7f-c9e0-4c83-ad2f-39a5a8894a83

From 55fc87cfdcef23eea402148c0a237976988107cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?=
 <sergio@codigoparallevar.com>
Date: Wed, 16 Apr 2025 01:00:09 +0200
Subject: [PATCH 8/8] Add absence of dependencies as principle.

---
 README.org       | 6 ++++--
 requirements.txt | 1 -
 2 files changed, 4 insertions(+), 3 deletions(-)
 delete mode 100644 requirements.txt

diff --git a/README.org b/README.org
index 253c8f6..6f03720 100644
--- a/README.org
+++ b/README.org
@@ -8,9 +8,11 @@ A python library to parse, modify and save Org-mode files.
 - Keep the original structure intact (indentation, spaces, format, ...).
 
 ** Principles
-- Data structures should be exposed as it's read on Emacs's org-mode or when in doubt as raw as possible.
-- Data in the objects should be modificable, as a way to update the document itself. *Consider this a Object-oriented design.*
+- Avoid any dependency outside of Python's standard library.
+- Don't do anything outside of the scope of parsing/re-serializing Org-mode files.
 - *Modification of the original text if there's no change is considered a bug (see [[id:7363ba38-1662-4d3c-9e83-0999824975b7][Known issues]]).*
+- Data structures should be exposed as it's read on Emacs's org-mode or when in doubt as raw as possible.
+- Data in the objects should be modificable as a way to update the document itself. *Consider this a Object-oriented design.*
 ** Safety mechanism
 As this library is still in early development. Running it over files might
 produce unexpected changes on them. For this reason it's heavily recommended to
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 1c51c66..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-# No external requirements at this point