Proposed alternative fix for the title stripping issue #1

Open
kenkeiras wants to merge 5 commits from kenkeiras/org-rw:proposed-fix/strip-title into fix/strip-title
3 changed files with 58 additions and 16 deletions

View File

@ -67,7 +67,9 @@ BASE_ENVIRONMENT = {
), ),
} }
HEADLINE_TAGS_RE = re.compile(r"((:(\w|[0-9_@#%])+)+:)\s*$") HEADLINE_TAGS_RE = re.compile(
r"((?P<space_before_tags>\s+)(:(\w|[0-9_@#%])+)+:)(?P<space_after_tags>\s*)$"
)
HEADLINE_RE = re.compile(r"^(?P<stars>\*+)(?P<spacing>\s+)(?P<line>.*?)$") HEADLINE_RE = re.compile(r"^(?P<stars>\*+)(?P<spacing>\s+)(?P<line>.*?)$")
KEYWORDS_RE = re.compile( KEYWORDS_RE = re.compile(
r"^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$" r"^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$"
@ -315,6 +317,8 @@ class Headline:
state, state,
tags_start, tags_start,
tags, tags,
space_before_tags,
space_after_tags,
contents, contents,
children, children,
structural, structural,
@ -340,6 +344,8 @@ class Headline:
self.title = parse_content_block([RawLine(linenum=start_line, line=title)]) self.title = parse_content_block([RawLine(linenum=start_line, line=title)])
self._state = state self._state = state
self.tags_start = tags_start self.tags_start = tags_start
self.space_before_tags = space_before_tags
self.space_after_tags = space_after_tags
self.shallow_tags = tags self.shallow_tags = tags
self.contents = contents self.contents = contents
self.children = children self.children = children
@ -1072,6 +1078,8 @@ class Headline:
state="", state="",
tags_start=None, tags_start=None,
tags=[], tags=[],
space_before_tags="",
space_after_tags="",
contents=[], contents=[],
children=[], children=[],
structural=[], structural=[],
@ -1816,7 +1824,7 @@ def token_list_to_plaintext(tok_list) -> str:
else: else:
assert isinstance(chunk, MarkerToken) assert isinstance(chunk, MarkerToken)
return "".join(contents).strip() return "".join(contents)
def token_list_to_raw(tok_list): def token_list_to_raw(tok_list):
@ -2182,8 +2190,11 @@ def parse_headline(hl, doc, parent) -> Headline:
if hl_tags is None: if hl_tags is None:
tags = [] tags = []
space_before_tags = space_after_tags = ""
else: else:
tags = hl_tags.group(0)[1:-1].split(":") tags = hl_tags.group(0).strip()[1:-1].split(":")
space_before_tags = hl_tags.group("space_before_tags") or ""
space_after_tags = hl_tags.group("space_after_tags") or ""
line = HEADLINE_TAGS_RE.sub("", line) line = HEADLINE_TAGS_RE.sub("", line)
hl_state = None hl_state = None
@ -2203,6 +2214,13 @@ def parse_headline(hl, doc, parent) -> Headline:
is_done = True is_done = True
break break
if len(tags) == 0:
# No tags, so title might contain trailing whitespaces, handle it
title_ends_with_whitespace_match = re.search(r"\s+$", title)
if title_ends_with_whitespace_match is not None:
space_before_tags = title_ends_with_whitespace_match.group(0)
title = title[: -len(space_before_tags)]
contents = parse_contents(hl["contents"]) contents = parse_contents(hl["contents"])
if not (isinstance(parent, OrgDoc) or depth > parent.depth): if not (isinstance(parent, OrgDoc) or depth > parent.depth):
@ -2229,6 +2247,8 @@ def parse_headline(hl, doc, parent) -> Headline:
priority_start=None, priority_start=None,
tags_start=None, tags_start=None,
tags=tags, tags=tags,
space_before_tags=space_before_tags,
space_after_tags=space_after_tags,
parent=parent, parent=parent,
is_todo=is_todo, is_todo=is_todo,
is_done=is_done, is_done=is_done,
@ -2430,27 +2450,19 @@ class OrgDoc:
# Writing # Writing
def dump_headline(self, headline, recursive=True): def dump_headline(self, headline, recursive=True):
tags = "" tags = headline.space_before_tags
if len(headline.shallow_tags) > 0: if len(headline.shallow_tags) > 0:
tags = ":" + ":".join(headline.shallow_tags) + ":" tags += (
":" + ":".join(headline.shallow_tags) + ":" + headline.space_after_tags
)
state = "" state = ""
if headline._state: if headline._state:
state = headline._state["name"] + " " state = headline._state["name"] + " "
raw_title = token_list_to_raw(headline.title.contents) raw_title = token_list_to_raw(headline.title.contents)
tags_padding = ""
if not (raw_title.endswith(" ") or raw_title.endswith("\t")) and tags:
tags_padding = " "
yield ( yield ("*" * headline.depth + headline.spacing + state + raw_title + tags)
"*" * headline.depth
+ headline.spacing
+ state
+ raw_title
+ tags_padding
+ tags
)
planning = headline.get_planning_line() planning = headline.get_planning_line()
if planning is not None: if planning is not None:

12
tests/14-titles.org Normal file
View File

@ -0,0 +1,12 @@
#+TITLE: 14-Simple
#+DESCRIPTION: Org file to evaluate titles
#+TODO: TODO(t) PAUSED(p) | DONE(d)
* Simple title
* Simple title with tags :tag:
* Simple title with trailing space
* Simple title with leading space

View File

@ -955,6 +955,24 @@ class TestSerde(unittest.TestCase):
h1_2_h2 = h1_2.children[0] h1_2_h2 = h1_2.children[0]
self.assertEqual(sorted(h1_2_h2.tags), ["otherh2tag"]) self.assertEqual(sorted(h1_2_h2.tags), ["otherh2tag"])
def test_titles_file(self):
with open(os.path.join(DIR, "14-titles.org")) as f:
doc = load(f)
h1, h2, h3, h4 = doc.getTopHeadlines()
self.assertEqual(h1.title.get_text(), "Simple title")
self.assertEqual(h2.title.get_text(), "Simple title with tags")
self.assertEqual(h3.title.get_text(), "Simple title with trailing space")
self.assertEqual(h4.title.get_text(), "Simple title with leading space")
def test_mimic_write_file_14(self):
"""A goal of this library is to be able to update a file without changing parts not directly modified."""
with open(os.path.join(DIR, "14-titles.org")) as f:
orig = f.read()
doc = loads(orig)
self.assertEqual(dumps(doc), orig)
def test_update_headline_from_none_to_todo(self): def test_update_headline_from_none_to_todo(self):
orig = "* First entry" orig = "* First entry"
doc = loads(orig) doc = loads(orig)