Add support for headline tags and state.

Also, keep track of the origin file of a given OrgDoc.
2020-12-22 10:21:41 +01:00 · 2020-12-22 10:21:41 +01:00 · f1be0c3a72
commit f1be0c3a72
parent bb24f9495e
1 changed files with 108 additions and 17 deletions
--- a/org_rw/org_rw.py
+++ b/org_rw/org_rw.py
@ -1,6 +1,7 @@
 import collections
 import difflib
 import logging
 import os
 import re
 import sys
 from enum import Enum
@ -41,8 +42,8 @@ BASE_ENVIRONMENT = {
    ),
 }
-
+HEADLINE_TAGS_RE = re.compile(r"((:[a-zA-Z0-9_@#%]+)+:)")
-HEADLINE_RE = re.compile(r"^(?P<stars>\*+) (?P<spacing>\s*)(?P<line>.*)$")
+HEADLINE_RE = re.compile(r"^(?P<stars>\*+) (?P<spacing>\s*)(?P<line>.*?)$")
 KEYWORDS_RE = re.compile(
    r"^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$"
 )
@ -148,12 +149,16 @@ class Headline:
        priority,
        title_start,
        title,
        state,
        tags_start,
        tags,
        contents,
        children,
        structural,
        delimiters,
        parent,
        is_todo,
        is_done,
    ):
        self.start_line = start_line
        self.depth = depth
@ -164,12 +169,23 @@ class Headline:
        self.priority = priority
        self.title_start = title_start
        self.title = title
        self.state = state
        self.tags_start = tags_start
-        self.tags = tags
+        self.shallow_tags = tags
        self.contents = contents
        self.children = children
        self.structural = structural
        self.delimiters = delimiters
        self.parent = parent
        self.is_todo = is_todo
        self.is_done = is_done
    @property
    def tags(self):
        if isinstance(self.parent, OrgDoc):
            return list(self.shallow_tags)
        else:
            return list(self.shallow_tags) + self.parent.tags
    def get_links(self):
        for content in self.contents:
@ -180,6 +196,16 @@ class Headline:
            if start <= line.linenum < end:
                yield "".join(line.contents)
    def get_contents(self, format):
        if format == "raw":
            lines = []
            for line in self.contents:
                lines.append(dump_contents(line))
            yield from map(lambda x: x[1], sorted(lines, key=lambda x: x[0]))
        else:
            raise NotImplementedError()
    def get_element_in_line(self, linenum):
        for line in self.contents:
            if linenum == line.linenum:
@ -790,22 +816,54 @@ def parse_content_block(raw_contents: List[RawLine]):
    return Text(contents, current_line)
-def parse_headline(hl) -> Headline:
+def dump_contents(raw):
    if isinstance(raw, RawLine):
        return (raw.linenum, raw.line)
    return (raw.linenum, raw.get_raw())
 def parse_headline(hl, doc, parent) -> Headline:
    stars = hl["orig"].group("stars")
    depth = len(stars)
    # TODO: Parse line for priority, cookies and tags
    line = hl["orig"].group("line")
    hl_tags = HEADLINE_TAGS_RE.search(line)
    if hl_tags is None:
        tags = []
    else:
        tags = hl_tags.group(0)[1:-1].split(":")
        line = HEADLINE_TAGS_RE.sub("", line)
    hl_state = None
    title = line
    is_done = is_todo = False
    for state in doc.todo_keywords:
        if title.startswith(state + " "):
            hl_state = state
            title = title[len(state + " ") :]
            is_todo = True
            break
    else:
        for state in doc.done_keywords:
            if title.startswith(state + " "):
                hl_state = state
                title = title[len(state + " ") :]
                is_done = True
                break
    contents = parse_contents(hl["contents"])
-    return Headline(
+    headline = Headline(
        start_line=hl["linenum"],
        depth=depth,
        orig=hl["orig"],
        title=title,
        state=hl_state,
        contents=contents,
-        children=[parse_headline(child) for child in hl["children"]],
+        children=None,
        keywords=hl["keywords"],
        properties=hl["properties"],
        structural=hl["structural"],
@ -814,15 +872,40 @@ def parse_headline(hl) -> Headline:
        priority=None,
        priority_start=None,
        tags_start=None,
-        tags=None,
+        tags=tags,
        parent=parent,
        is_todo=is_todo,
        is_done=is_done,
    )
    headline.children = [
        parse_headline(child, doc, headline) for child in hl["children"]
    ]
    return headline
 class OrgDoc:
    def __init__(self, headlines, keywords, contents):
-        self.headlines: List[Headline] = list(map(parse_headline, headlines))
+        self.todo_keywords = None
        self.done_keywords = None
        for keyword in keywords:
            if keyword.key == "TODO":
                todo_kws, done_kws = re.sub(r"\(.\)", "", keyword.value).split("|", 1)
                self.todo_keywords = re.sub(r"\s{2,}", " ", todo_kws.strip()).split()
                self.done_keywords = re.sub(r"\s{2,}", " ", done_kws.strip()).split()
        self.keywords: List[Property] = keywords
        self.contents: List[RawLine] = contents
        self._path = None
        self.headlines: List[Headline] = list(
            map(lambda hl: parse_headline(hl, self, self), headlines)
        )
    @property
    def path(self):
        return self._path
    ## Querying
    def get_links(self):
@ -882,12 +965,6 @@ class OrgDoc:
            ),
        )
    def dump_contents(self, raw):
        if isinstance(raw, RawLine):
            return (raw.linenum, raw.line)
        return (raw.linenum, raw.get_raw())
    def dump_structural(self, structural: Tuple):
        return (structural[0], structural[1])
@ -908,7 +985,7 @@ class OrgDoc:
            lines.append((KW_T, self.dump_kw(keyword)))
        for content in headline.contents:
-            lines.append((CONTENT_T, self.dump_contents(content)))
+            lines.append((CONTENT_T, dump_contents(content)))
        for prop in headline.properties:
            lines.append((PROPERTIES_T, self.dump_property(prop)))
@ -968,7 +1045,7 @@ class OrgDoc:
            lines.append(self.dump_kw(kw))
        for line in self.contents:
-            lines.append(self.dump_contents(line))
+            lines.append(dump_contents(line))
        yield from map(lambda x: x[1], sorted(lines, key=lambda x: x[0]))
@ -1151,7 +1228,9 @@ def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=True):
 def load(f, environment=BASE_ENVIRONMENT, extra_cautious=False):
-    return loads(f.read(), environment, extra_cautious)
+    doc = loads(f.read(), environment, extra_cautious)
    doc._path = os.path.abspath(f.name)
    return doc
 def dumps(doc):
@ -1159,3 +1238,15 @@ def dumps(doc):
    result = "\n".join(dump)
    # print(result)
    return result
 def dump(doc, fp):
    it = doc.dump()
    # Write first line separately
    line = next(it)
    fp.write(line)
    # Write following ones preceded by line jump
    for line in it:
        fp.write("\n" + line)