Compare commits

..

7 Commits

Author SHA1 Message Date
Sergio Martínez Portela
3193ecbc36 fix: Creation of new headlines.
All checks were successful
Testing / pytest (push) Successful in 18s
Testing / mypy (push) Successful in 25s
Testing / style-formatting (push) Successful in 18s
Testing / style-sorted-imports (push) Successful in 16s
Testing / stability-extra-test (push) Successful in 19s
2025-04-16 00:41:27 +02:00
Sergio Martínez Portela
14e344981b format: Apply black formatter.
Some checks failed
Testing / pytest (push) Successful in 21s
Testing / mypy (push) Failing after 28s
Testing / style-formatting (push) Successful in 23s
Testing / style-sorted-imports (push) Successful in 17s
Testing / stability-extra-test (push) Successful in 21s
2025-04-16 00:39:08 +02:00
Sergio Martínez Portela
527a9e7eb2 feat: Keep headline whitespaces info & remove them from title text.
Some checks failed
Testing / style-formatting (push) Waiting to run
Testing / style-sorted-imports (push) Waiting to run
Testing / stability-extra-test (push) Waiting to run
Testing / pytest (push) Successful in 20s
Testing / mypy (push) Has been cancelled
2025-04-16 00:37:38 +02:00
Sergio Martínez Portela
9c54f83ec7 revert: Remove old implementation change.
Some checks failed
Testing / pytest (push) Failing after 20s
Testing / mypy (push) Successful in 35s
Testing / style-formatting (push) Successful in 18s
Testing / style-sorted-imports (push) Successful in 16s
Testing / stability-extra-test (push) Successful in 19s
This is reverted as it doesn't return accurately the information that's on the org-mode file.
2025-04-16 00:06:13 +02:00
Sergio Martínez Portela
123f5c9115 test: Propose tests for title parsing changes. 2025-04-16 00:05:24 +02:00
ae1aa7bf9c Merge branch 'develop' into fix/strip-title 2025-04-15 21:57:53 +00:00
Lyz
6710775882
fix: strip token_list_to_plaintext
otherwise when you do headline.title.get_text() you may have trailing
whitespaces
2025-01-25 14:22:23 +01:00
5 changed files with 58 additions and 22 deletions

View File

@ -7,12 +7,6 @@ A python library to parse, modify and save Org-mode files.
- Modify these data and write it back to disk. - Modify these data and write it back to disk.
- Keep the original structure intact (indentation, spaces, format, ...). - Keep the original structure intact (indentation, spaces, format, ...).
** Principles
- Avoid any dependency outside of Python's standard library.
- Don't do anything outside of the scope of parsing/re-serializing Org-mode files.
- *Modification of the original text if there's no change is considered a bug (see [[id:7363ba38-1662-4d3c-9e83-0999824975b7][Known issues]]).*
- Data structures should be exposed as it's read on Emacs's org-mode or when in doubt as raw as possible.
- Data in the objects should be modificable as a way to update the document itself. *Consider this a Object-oriented design.*
** Safety mechanism ** Safety mechanism
As this library is still in early development. Running it over files might As this library is still in early development. Running it over files might
produce unexpected changes on them. For this reason it's heavily recommended to produce unexpected changes on them. For this reason it's heavily recommended to
@ -27,9 +21,6 @@ Also, see [[id:76e77f7f-c9e0-4c83-ad2f-39a5a8894a83][Known issues:Structure modi
not properly stored and can trigger this safety mechanism on a false-positive. not properly stored and can trigger this safety mechanism on a false-positive.
* Known issues * Known issues
:PROPERTIES:
:ID: 7363ba38-1662-4d3c-9e83-0999824975b7
:END:
** Structure modifications ** Structure modifications
:PROPERTIES: :PROPERTIES:
:ID: 76e77f7f-c9e0-4c83-ad2f-39a5a8894a83 :ID: 76e77f7f-c9e0-4c83-ad2f-39a5a8894a83

View File

@ -67,7 +67,9 @@ BASE_ENVIRONMENT = {
), ),
} }
HEADLINE_TAGS_RE = re.compile(r"((:(\w|[0-9_@#%])+)+:)\s*$") HEADLINE_TAGS_RE = re.compile(
r"((?P<space_before_tags>\s+)(:(\w|[0-9_@#%])+)+:)(?P<space_after_tags>\s*)$"
)
HEADLINE_RE = re.compile(r"^(?P<stars>\*+)(?P<spacing>\s+)(?P<line>.*?)$") HEADLINE_RE = re.compile(r"^(?P<stars>\*+)(?P<spacing>\s+)(?P<line>.*?)$")
KEYWORDS_RE = re.compile( KEYWORDS_RE = re.compile(
r"^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$" r"^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$"
@ -315,6 +317,8 @@ class Headline:
state, state,
tags_start, tags_start,
tags, tags,
space_before_tags,
space_after_tags,
contents, contents,
children, children,
structural, structural,
@ -340,6 +344,8 @@ class Headline:
self.title = parse_content_block([RawLine(linenum=start_line, line=title)]) self.title = parse_content_block([RawLine(linenum=start_line, line=title)])
self._state = state self._state = state
self.tags_start = tags_start self.tags_start = tags_start
self.space_before_tags = space_before_tags
self.space_after_tags = space_after_tags
self.shallow_tags = tags self.shallow_tags = tags
self.contents = contents self.contents = contents
self.children = children self.children = children
@ -417,7 +423,6 @@ class Headline:
and line.delimiter_type == DelimiterLineType.END_BLOCK and line.delimiter_type == DelimiterLineType.END_BLOCK
and line.type_data.subtype == current_node.header.type_data.subtype and line.type_data.subtype == current_node.header.type_data.subtype
): ):
start = current_node.header.linenum start = current_node.header.linenum
end = line.linenum end = line.linenum
@ -824,7 +829,6 @@ class Headline:
def set_property(self, name: str, value: str): def set_property(self, name: str, value: str):
for prop in self.properties: for prop in self.properties:
# A matching property is found, update it # A matching property is found, update it
if prop.key == name: if prop.key == name:
prop.value = value prop.value = value
@ -1024,7 +1028,6 @@ class Headline:
and result_first[0] == "structural" and result_first[0] == "structural"
and result_first[1].strip().upper() == ":RESULTS:" and result_first[1].strip().upper() == ":RESULTS:"
): ):
(end_line, _) = self.get_structural_end_after( (end_line, _) = self.get_structural_end_after(
kword.linenum + 1 kword.linenum + 1
) )
@ -1075,6 +1078,8 @@ class Headline:
state="", state="",
tags_start=None, tags_start=None,
tags=[], tags=[],
space_before_tags="",
space_after_tags="",
contents=[], contents=[],
children=[], children=[],
structural=[], structural=[],
@ -2041,7 +2046,6 @@ def tokenize_contents(contents: str) -> List[TokenItems]:
and is_pre(last_char) and is_pre(last_char)
and ((i + 1 < len(contents)) and is_border(contents[i + 1])) and ((i + 1 < len(contents)) and is_border(contents[i + 1]))
): ):
is_valid_mark = False is_valid_mark = False
# Check that is closed later # Check that is closed later
text_in_line = True text_in_line = True
@ -2186,8 +2190,11 @@ def parse_headline(hl, doc, parent) -> Headline:
if hl_tags is None: if hl_tags is None:
tags = [] tags = []
space_before_tags = space_after_tags = ""
else: else:
tags = hl_tags.group(0)[1:-1].split(":") tags = hl_tags.group(0).strip()[1:-1].split(":")
space_before_tags = hl_tags.group("space_before_tags") or ""
space_after_tags = hl_tags.group("space_after_tags") or ""
line = HEADLINE_TAGS_RE.sub("", line) line = HEADLINE_TAGS_RE.sub("", line)
hl_state = None hl_state = None
@ -2207,6 +2214,13 @@ def parse_headline(hl, doc, parent) -> Headline:
is_done = True is_done = True
break break
if len(tags) == 0:
# No tags, so title might contain trailing whitespaces, handle it
title_ends_with_whitespace_match = re.search(r"\s+$", title)
if title_ends_with_whitespace_match is not None:
space_before_tags = title_ends_with_whitespace_match.group(0)
title = title[: -len(space_before_tags)]
contents = parse_contents(hl["contents"]) contents = parse_contents(hl["contents"])
if not (isinstance(parent, OrgDoc) or depth > parent.depth): if not (isinstance(parent, OrgDoc) or depth > parent.depth):
@ -2233,6 +2247,8 @@ def parse_headline(hl, doc, parent) -> Headline:
priority_start=None, priority_start=None,
tags_start=None, tags_start=None,
tags=tags, tags=tags,
space_before_tags=space_before_tags,
space_after_tags=space_after_tags,
parent=parent, parent=parent,
is_todo=is_todo, is_todo=is_todo,
is_done=is_done, is_done=is_done,
@ -2434,21 +2450,19 @@ class OrgDoc:
# Writing # Writing
def dump_headline(self, headline, recursive=True): def dump_headline(self, headline, recursive=True):
tags = headline.space_before_tags
tags = ""
if len(headline.shallow_tags) > 0: if len(headline.shallow_tags) > 0:
tags = ":" + ":".join(headline.shallow_tags) + ":" tags += (
":" + ":".join(headline.shallow_tags) + ":" + headline.space_after_tags
)
state = "" state = ""
if headline._state: if headline._state:
state = headline._state["name"] + " " state = headline._state["name"] + " "
raw_title = token_list_to_raw(headline.title.contents) raw_title = token_list_to_raw(headline.title.contents)
tags_padding = ""
if not (raw_title.endswith(" ") or raw_title.endswith("\t")) and tags:
tags_padding = " "
yield "*" * headline.depth + headline.spacing + state + raw_title + tags_padding + tags yield ("*" * headline.depth + headline.spacing + state + raw_title + tags)
planning = headline.get_planning_line() planning = headline.get_planning_line()
if planning is not None: if planning is not None:

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
# No external requirements at this point

12
tests/14-titles.org Normal file
View File

@ -0,0 +1,12 @@
#+TITLE: 14-Simple
#+DESCRIPTION: Org file to evaluate titles
#+TODO: TODO(t) PAUSED(p) | DONE(d)
* Simple title
* Simple title with tags :tag:
* Simple title with trailing space
* Simple title with leading space

View File

@ -955,6 +955,24 @@ class TestSerde(unittest.TestCase):
h1_2_h2 = h1_2.children[0] h1_2_h2 = h1_2.children[0]
self.assertEqual(sorted(h1_2_h2.tags), ["otherh2tag"]) self.assertEqual(sorted(h1_2_h2.tags), ["otherh2tag"])
def test_titles_file(self):
with open(os.path.join(DIR, "14-titles.org")) as f:
doc = load(f)
h1, h2, h3, h4 = doc.getTopHeadlines()
self.assertEqual(h1.title.get_text(), "Simple title")
self.assertEqual(h2.title.get_text(), "Simple title with tags")
self.assertEqual(h3.title.get_text(), "Simple title with trailing space")
self.assertEqual(h4.title.get_text(), "Simple title with leading space")
def test_mimic_write_file_14(self):
"""A goal of this library is to be able to update a file without changing parts not directly modified."""
with open(os.path.join(DIR, "14-titles.org")) as f:
orig = f.read()
doc = loads(orig)
self.assertEqual(dumps(doc), orig)
def test_update_headline_from_none_to_todo(self): def test_update_headline_from_none_to_todo(self):
orig = "* First entry" orig = "* First entry"
doc = loads(orig) doc = loads(orig)