diff --git a/.gitea/workflows/tests.yaml b/.gitea/workflows/tests.yaml new file mode 100644 index 0000000..a3adf0a --- /dev/null +++ b/.gitea/workflows/tests.yaml @@ -0,0 +1,53 @@ +name: Testing +# run-name: ${{ gitea.actor }} is testing out Gitea Actions 🚀 +on: [push] + +jobs: + pytest: + runs-on: ubuntu-latest + steps: + - name: Check out repository code + uses: actions/checkout@v3 + - run: apt-get update && apt-get install -y python3-pip + - run: pip install --break-system-package -e . + - run: pip install --break-system-package pytest + - run: pytest + + mypy: + runs-on: ubuntu-latest + steps: + - name: Check out repository code + uses: actions/checkout@v3 + - run: apt-get update && apt-get install -y python3-pip + - run: pip install --break-system-package -e . + - run: pip install --break-system-package mypy + - run: mypy org_rw --check-untyped-defs + + style-formatting: + runs-on: ubuntu-latest + steps: + - name: Check out repository code + uses: actions/checkout@v3 + - run: apt-get update && apt-get install -y python3-pip + - run: pip install --break-system-package -e . + - run: pip install --break-system-package black + - run: black --check . + + style-sorted-imports: + runs-on: ubuntu-latest + steps: + - name: Check out repository code + uses: actions/checkout@v3 + - run: apt-get update && apt-get install -y python3-pip + - run: pip install --break-system-package -e . + - run: pip install --break-system-package isort + - run: isort --profile black --check . + + stability-extra-test: + runs-on: ubuntu-latest + steps: + - name: Check out repository code + uses: actions/checkout@v3 + - run: apt-get update && apt-get install -y git-core python3-pip + - run: pip install --break-system-package -e . + - run: bash extra-tests/check_all.sh diff --git a/.gitignore b/.gitignore index 5c8ee49..2fafd0e 100644 --- a/.gitignore +++ b/.gitignore @@ -139,3 +139,6 @@ dmypy.json # Cython debug symbols cython_debug/ + +# Files for PyPI publishing +README.md diff --git a/README.org b/README.org index 95ec98a..6f03720 100644 --- a/README.org +++ b/README.org @@ -7,6 +7,12 @@ A python library to parse, modify and save Org-mode files. - Modify these data and write it back to disk. - Keep the original structure intact (indentation, spaces, format, ...). +** Principles +- Avoid any dependency outside of Python's standard library. +- Don't do anything outside of the scope of parsing/re-serializing Org-mode files. +- *Modification of the original text if there's no change is considered a bug (see [[id:7363ba38-1662-4d3c-9e83-0999824975b7][Known issues]]).* +- Data structures should be exposed as it's read on Emacs's org-mode or when in doubt as raw as possible. +- Data in the objects should be modificable as a way to update the document itself. *Consider this a Object-oriented design.* ** Safety mechanism As this library is still in early development. Running it over files might produce unexpected changes on them. For this reason it's heavily recommended to @@ -21,6 +27,9 @@ Also, see [[id:76e77f7f-c9e0-4c83-ad2f-39a5a8894a83][Known issues:Structure modi not properly stored and can trigger this safety mechanism on a false-positive. * Known issues +:PROPERTIES: +:ID: 7363ba38-1662-4d3c-9e83-0999824975b7 +:END: ** Structure modifications :PROPERTIES: :ID: 76e77f7f-c9e0-4c83-ad2f-39a5a8894a83 diff --git a/org_rw/dom.py b/org_rw/dom.py index cb3d8fd..baf0092 100644 --- a/org_rw/dom.py +++ b/org_rw/dom.py @@ -1,3 +1,6 @@ +from typing import List, Optional, Union + + class DrawerNode: def __init__(self): self.children = [] @@ -21,6 +24,14 @@ class ResultsDrawerNode(DrawerNode): return "".format(len(self.children)) +class GenericDrawerNode(DrawerNode): + def __init__(self, drawer_name): + self.drawer_name = drawer_name + + def __repr__(self): + return "".format(self.drawer_name, len(self.children)) + + class PropertyNode: def __init__(self, key, value): self.key = key @@ -38,11 +49,12 @@ class ListGroupNode: self.children.append(child) def get_raw(self): - return '\n'.join([c.get_raw() for c in self.children]) + return "\n".join([c.get_raw() for c in self.children]) def __repr__(self): return "".format(len(self.children)) + class TableNode: def __init__(self): self.children = [] @@ -53,21 +65,30 @@ class TableNode: def __repr__(self): return "".format(len(self.children)) + class TableSeparatorRow: def __init__(self, orig=None): self.orig = orig + def get_raw(self): + return get_raw_contents(self.orig) + + class TableRow: def __init__(self, cells, orig=None): self.cells = cells self.orig = orig + def get_raw(self): + return get_raw_contents(self.orig) + + class Text: def __init__(self, content): self.content = content def get_raw(self): - return ''.join(self.content.get_raw()) + return "".join(self.content.get_raw()) class ListItem: @@ -92,7 +113,7 @@ class CodeBlock(BlockNode): def __init__(self, header, subtype, arguments): super().__init__() self.header = header - self.lines = None + self.lines: Optional[List] = None self.subtype = subtype self.arguments = arguments @@ -100,6 +121,26 @@ class CodeBlock(BlockNode): self.lines = lines def __repr__(self): - return "".format(len(self.lines)) + return "".format(len(self.lines or [])) + + +DomNode = Union[ + DrawerNode, + PropertyNode, + ListGroupNode, + TableNode, + TableSeparatorRow, + TableRow, + Text, + ListItem, + BlockNode, +] + +ContainerDomNode = Union[ + DrawerNode, + ListGroupNode, + TableNode, + BlockNode, +] from .utils import get_raw_contents diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 9a60199..6baadd1 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -8,14 +8,32 @@ import re import sys from datetime import date, datetime, timedelta from enum import Enum -from typing import Generator, List, Optional, Tuple, Union +from typing import ( + Dict, + Iterator, + List, + Literal, + Optional, + TextIO, + Tuple, + TypedDict, + Union, + cast, +) from . import dom +from .types import HeadlineDict DEBUG_DIFF_CONTEXT = 10 +DEFAULT_TODO_KEYWORDS = ["TODO"] +DEFAULT_DONE_KEYWORDS = ["DONE"] + BASE_ENVIRONMENT = { "org-footnote-section": "Footnotes", + "org-todo-keywords": " ".join(DEFAULT_TODO_KEYWORDS) + + " | " + + " ".join(DEFAULT_DONE_KEYWORDS), "org-options-keywords": ( "ARCHIVE:", "AUTHOR:", @@ -49,9 +67,6 @@ BASE_ENVIRONMENT = { ), } -DEFAULT_TODO_KEYWORDS = ["TODO"] -DEFAULT_DONE_KEYWORDS = ["DONE"] - HEADLINE_TAGS_RE = re.compile(r"((:(\w|[0-9_@#%])+)+:)\s*$") HEADLINE_RE = re.compile(r"^(?P\*+)(?P\s+)(?P.*?)$") KEYWORDS_RE = re.compile( @@ -88,25 +103,47 @@ PLANNING_RE = re.compile( r")+\s*" ) LIST_ITEM_RE = re.compile( - r"(?P\s*)((?P[*\-+])|((?P\d|[a-zA-Z])(?P[.)]))) ((?P\s*)\[(?P[ Xx])\])?((?P\s*)(?P.*?)::)?(?P.*)" + r"(?P\s*)((?P[*\-+])|((?P\d|[a-zA-Z])(?P[.)]))) ((?P\s*)\[(?P[ Xx])\])?((?P\s*)((?P.*?)\s::))?(?P.*)" ) +IMPLICIT_LINK_RE = re.compile(r"(https?:[^<> ]*[a-zA-Z0-9])") + # Org-Babel BEGIN_BLOCK_RE = re.compile(r"^\s*#\+BEGIN_(?P[^ ]+)(?P.*)$", re.I) END_BLOCK_RE = re.compile(r"^\s*#\+END_(?P[^ ]+)\s*$", re.I) RESULTS_DRAWER_RE = re.compile(r"^\s*:results:\s*$", re.I) -CodeSnippet = collections.namedtuple("CodeSnippet", ("name", "content", "result", "arguments")) +CodeSnippet = collections.namedtuple( + "CodeSnippet", ("name", "content", "result", "language", "arguments") +) # Groupings -NON_FINISHED_GROUPS = (type(None), dom.ListGroupNode, dom.ResultsDrawerNode, dom.PropertyDrawerNode) +NON_FINISHED_GROUPS = ( + type(None), + dom.ListGroupNode, + dom.ResultsDrawerNode, + dom.PropertyDrawerNode, + dom.GenericDrawerNode, +) FREE_GROUPS = (dom.CodeBlock,) +# States +class HeadlineState(TypedDict): + # To be extended to handle keyboard shortcuts + name: str + + +class OrgDocDeclaredStates(TypedDict): + not_completed: List[HeadlineState] + completed: List[HeadlineState] + + class NonReproducibleDocument(Exception): """ Exception thrown when a document would be saved as different contents from what it's loaded from. """ + pass @@ -154,31 +191,30 @@ class RangeInRaw: contents.insert(start_idx + i + 1, element) -def unescape_block_lines(lines: str) -> str: +def unescape_block_lines(block: str) -> str: """ Remove leading ',' from block_lines if they escape `*` characters. """ i = 0 - lines = lines.split('\n') + lines = block.split("\n") while i < len(lines): line = lines[i] - if (line.lstrip(' ').startswith(',') - and line.lstrip(' ,').startswith('*') - ): + if line.lstrip(" ").startswith(",") and line.lstrip(" ,").startswith("*"): # Remove leading ',' - lead_pos = line.index(',') - line = line[:lead_pos] + line[lead_pos + 1:] + lead_pos = line.index(",") + line = line[:lead_pos] + line[lead_pos + 1 :] lines[i] = line i += 1 - return '\n'.join(lines) + return "\n".join(lines) + def get_links_from_content(content): in_link = False in_description = False - link_value = [] - link_description = [] + link_value: List[str] = [] + link_description: List[str] = [] for i, tok in enumerate(get_tokens(content)): if isinstance(tok, LinkToken): @@ -203,6 +239,11 @@ def get_links_from_content(content): link_description.append(tok) else: link_value.append(tok) + elif isinstance(tok, str): + implicit_links = IMPLICIT_LINK_RE.findall(tok) + for link in implicit_links: + yield Link(cast(str, link), cast(str, link), None) + def text_to_dom(tokens, item): if tokens is None: @@ -210,8 +251,8 @@ def text_to_dom(tokens, item): in_link = False in_description = False - link_value = [] - link_description = [] + link_value: List[str] = [] + link_description: List[str] = [] contents = [] @@ -224,11 +265,13 @@ def text_to_dom(tokens, item): in_description = True elif tok.tok_type == LinkTokenType.CLOSE: rng = RangeInRaw(item, open_link_token, tok) - contents.append(Link( - "".join(link_value), - "".join(link_description) if in_description else None, - rng, - )) + contents.append( + Link( + "".join(link_value), + "".join(link_description) if in_description else None, + rng, + ) + ) in_link = False in_description = False link_value = [] @@ -243,6 +286,7 @@ def text_to_dom(tokens, item): return contents + def get_line(item): if isinstance(item, Text): return item.linenum @@ -278,9 +322,12 @@ class Headline: list_items, table_rows, parent, - is_todo, - is_done, + is_todo: bool, + is_done: bool, spacing, + scheduled: Optional[Time] = None, + deadline: Optional[Time] = None, + closed: Optional[Time] = None, ): self.start_line = start_line self.depth = depth @@ -290,10 +337,8 @@ class Headline: self.priority_start = priority_start self.priority = priority self.title_start = title_start - self.title = parse_content_block( - [RawLine(linenum=start_line, line=title)] - ) - self.state = state + self.title = parse_content_block([RawLine(linenum=start_line, line=title)]) + self._state = state self.tags_start = tags_start self.shallow_tags = tags self.contents = contents @@ -305,9 +350,9 @@ class Headline: self.parent = parent self.is_todo = is_todo self.is_done = is_done - self.scheduled = None - self.deadline = None - self.closed = None + self.scheduled = scheduled + self.deadline = deadline + self.closed = closed self.spacing = spacing # Read planning line @@ -332,12 +377,12 @@ class Headline: ) ] - if scheduled := m.group("scheduled"): - self.scheduled = parse_time(scheduled) - if closed := m.group("closed"): - self.closed = parse_time(closed) - if deadline := m.group("deadline"): - self.deadline = parse_time(deadline) + if scheduled_m := m.group("scheduled"): + self.scheduled = parse_time(scheduled_m) + if closed_m := m.group("closed"): + self.closed = parse_time(closed_m) + if deadline_m := m.group("deadline"): + self.deadline = parse_time(deadline_m) # Remove from contents self._remove_element_in_line(start_line + 1) @@ -349,7 +394,6 @@ class Headline: par = par.parent return par - def as_dom(self): everything = ( self.keywords @@ -361,15 +405,17 @@ class Headline: + self.delimiters ) - tree = [] - current_node = None - indentation_tree = [] + tree: List[dom.DomNode] = [] + current_node: Optional[dom.DomNode] = None + indentation_tree: List[dom.ContainerDomNode] = [] + contents: Optional[str] = None for line in sorted(everything, key=get_line): if isinstance(current_node, dom.CodeBlock): if ( isinstance(line, DelimiterLine) and line.delimiter_type == DelimiterLineType.END_BLOCK + and line.type_data.subtype == current_node.header.type_data.subtype ): start = current_node.header.linenum @@ -386,7 +432,7 @@ class Headline: tree.append(current_node) current_node = None else: - pass # Ignore + pass # Ignore elif isinstance(line, Property): if type(current_node) in NON_FINISHED_GROUPS: @@ -398,33 +444,36 @@ class Headline: elif isinstance(line, Text): tree_up = list(indentation_tree) while len(tree_up) > 0: - node = tree_up[-1] - if (isinstance(node, dom.BlockNode) - or isinstance(node, dom.DrawerNode) + node: dom.DomNode = tree_up[-1] + if isinstance(node, dom.BlockNode) or isinstance( + node, dom.DrawerNode ): node.append(dom.Text(line)) current_node = node - contents = [] + contents = None break - elif ((not isinstance(node, dom.TableNode)) and - (type(node) not in NON_FINISHED_GROUPS) + elif (not isinstance(node, dom.TableNode)) and ( + type(node) not in NON_FINISHED_GROUPS ): - raise NotImplementedError('Not implemented node type: {} (headline_id={}, line={}, doc={})'.format( - node, - self.id, - line.linenum, - self.doc.path, - )) + raise NotImplementedError( + "Not implemented node type: {} (headline_id={}, line={}, doc={})".format( + node, + self.id, + line.linenum, + self.doc.path, + ) + ) else: tree_up.pop(-1) else: current_node = None - contents = [] + contents = None tree.append(dom.Text(text_to_dom(line.contents, line))) indentation_tree = tree_up elif isinstance(line, ListItem): - if (current_node is None + if ( + current_node is None or isinstance(current_node, dom.TableNode) or isinstance(current_node, dom.BlockNode) or isinstance(current_node, dom.DrawerNode) @@ -438,7 +487,14 @@ class Headline: indentation_tree.append(current_node) if not isinstance(current_node, dom.ListGroupNode): if not isinstance(current_node, dom.ListGroupNode): - raise Exception("Expected a {}, found: {} on line {} on {}".format(dom.ListGroupNode, current_node, line.linenum, self.doc.path)) + raise Exception( + "Expected a {}, found: {} on line {} on {}".format( + dom.ListGroupNode, + current_node, + line.linenum, + self.doc.path, + ) + ) # This can happen. Frequently inside a LogDrawer if len(indentation_tree) > 0 and ( @@ -464,10 +520,9 @@ class Headline: if isinstance(c, dom.ListItem) ] - if (len(list_children) == 0): + if len(list_children) == 0: break - if ((len(list_children[-1].orig.indentation) - <= len(line.indentation))): + if len(list_children[-1].orig.indentation) <= len(line.indentation): # No more breaking out of lists, it's indentation # is less than ours break @@ -480,7 +535,11 @@ class Headline: else: current_node = indentation_tree[-1] - node = dom.ListItem(text_to_dom(line.tag, line), text_to_dom(line.content, line), orig=line) + node = dom.ListItem( + text_to_dom(line.tag, line), + text_to_dom(line.content, line), + orig=line, + ) current_node.append(node) elif isinstance(line, TableRow): @@ -497,13 +556,22 @@ class Headline: list_node.append(current_node) indentation_tree.append(current_node) else: - logging.debug("Expected a {}, found: {} on line {}".format(dom.TableNode, current_node, line.linenum)) + logging.debug( + "Expected a {}, found: {} on line {}".format( + dom.TableNode, current_node, line.linenum + ) + ) # This can happen. Frequently inside a LogDrawer - if len(line.cells) > 0 and len(line.cells[0]) > 0 and line.cells[0][0] == '-': + if ( + len(line.cells) > 0 + and len(line.cells[0]) > 0 + and line.cells[0][0] == "-" + ): node = dom.TableSeparatorRow(orig=line) else: node = dom.TableRow(line.cells, orig=line) + current_node = cast(dom.ContainerDomNode, current_node) current_node.append(node) elif ( @@ -511,7 +579,9 @@ class Headline: and line.delimiter_type == DelimiterLineType.BEGIN_BLOCK ): assert type(current_node) in NON_FINISHED_GROUPS - current_node = dom.CodeBlock(line, line.type_data.subtype, line.arguments) + current_node = dom.CodeBlock( + line, line.type_data.subtype, line.arguments + ) elif isinstance(line, Keyword): logging.warning("Keywords not implemented on `as_dom()`") @@ -545,7 +615,7 @@ class Headline: indentation_tree = [current_node] elif content.strip().upper() == ":END:": if current_node is None and len(indentation_tree) == 0: - logging.error('Finished node (:END:) with no known starter') + logging.error("Finished node (:END:) with no known starter") else: tree_up = list(indentation_tree) while len(tree_up) > 0: @@ -558,12 +628,23 @@ class Headline: else: tree_up.pop(-1) else: - raise Exception('Unexpected node ({}) on headline (id={}), line {}'.format(current_node, self.id, linenum)) + raise Exception( + "Unexpected node ({}) on headline (id={}), line {}".format( + current_node, self.id, linenum + ) + ) current_node = None elif content.strip().upper() == ":RESULTS:": assert current_node is None current_node = dom.ResultsDrawerNode() + # TODO: Allow indentation of these blocks inside others + indentation_tree = [current_node] + tree.append(current_node) + elif content.strip().startswith(":") and content.strip().endswith(":"): + assert current_node is None + current_node = dom.GenericDrawerNode(content.strip().strip(":")) + # TODO: Allow indentation of these blocks inside others indentation_tree = [current_node] tree.append(current_node) @@ -583,19 +664,22 @@ class Headline: lists.append([li]) else: num_lines = li.linenum - (last_line + 1) - lines_between = ''.join(['\n' + l - for l in self.get_lines_between(last_line + 1, li.linenum)] - ) + lines_between = "".join( + [ + "\n" + l + for l in self.get_lines_between(last_line + 1, li.linenum) + ] + ) # Only empty lines - if ((num_lines == lines_between.count('\n')) - and (len(lines_between.strip()) == 0) + if (num_lines == lines_between.count("\n")) and ( + len(lines_between.strip()) == 0 ): lists[-1].append(li) else: lists.append([li]) - last_line = li.linenum + sum(c.count('\n') for c in li.content) + last_line = li.linenum + sum(c.count("\n") for c in li.content) return lists # @DEPRECATED: use `get_lists` @@ -603,7 +687,7 @@ class Headline: return self.get_lists() def get_tables(self): - tables = [] + tables: List[List] = [] # TableRow[][] last_line = None for row in self.table_rows: @@ -651,6 +735,42 @@ class Headline: def id(self, value): self.set_property("ID", value) + @property + def state(self) -> HeadlineState: + return self._state + + @state.setter + def state(self, new_state: Union[None, str, HeadlineState]) -> None: + """ + Update the state of a Headline. If the state is a known one it will update it's TODO/DONE properties. + + Args: + new_state (str|HeadlineState): New state, either it's literal value or it's structure. + """ + if new_state is None: + self.is_todo = False + self.is_done = False + # TODO: Check & log if appropriate? + self._state = None + return + + if isinstance(new_state, str): + new_state = HeadlineState(name=new_state) + + state_name = new_state["name"] + if state_name in [kw["name"] for kw in self.doc.todo_keywords]: + self.is_todo = True + self.is_done = False + # TODO: Check & log if appropriate? + elif state_name in [kw["name"] for kw in self.doc.done_keywords]: + self.is_todo = False + self.is_done = True + # TODO: Check, log & if appropriate? + else: + # TODO: Should we raise a warning, raise an exception, update the is_todo/is_done? + pass + self._state = new_state + @property def clock(self): times = [] @@ -662,6 +782,7 @@ class Headline: time_seg = content[len("CLOCK:") :].strip() + parsed: Optional[Time] = None if "--" in time_seg: # TODO: Consider duration start, end = time_seg.split("=")[0].split("--") @@ -669,16 +790,27 @@ class Headline: parsed = as_time_range else: parsed = OrgTime.parse(time_seg) - times.append(parsed) + + if parsed is not None: + times.append(parsed) return times @property - def tags(self): - if isinstance(self.parent, OrgDoc): - return list(self.shallow_tags) - else: - return list(self.shallow_tags) + self.parent.tags + def tags(self) -> list[str]: + parent_tags = self.parent.tags + if self.doc.environment.get("org-use-tag-inheritance"): + accepted_tags = [] + for tag in self.doc.environment.get("org-use-tag-inheritance"): + if tag in parent_tags: + accepted_tags.append(tag) + parent_tags = accepted_tags + + elif self.doc.environment.get("org-tags-exclude-from-inheritance"): + for tag in self.doc.environment.get("org-tags-exclude-from-inheritance"): + if tag in parent_tags: + parent_tags.remove(tag) + return list(self.shallow_tags) + parent_tags def add_tag(self, tag: str): self.shallow_tags.append(tag) @@ -737,13 +869,28 @@ class Headline: for lst in self.get_lists(): for item in lst: if item.tag: - yield from get_links_from_content(item.tag) + yield from get_links_from_content(item.tag) yield from get_links_from_content(item.content) def get_lines_between(self, start, end): - for line in self.contents: + # @TODO: Generalize for other line types too. + everything = ( + [] + # + self.keywords + + self.contents + # + self.list_items + # + self.table_rows + # + self.properties + # + self.structural + + self.delimiters + ) + + for line in everything: if start <= line.linenum < end: - yield "".join(line.get_raw()) + if "get_raw" in dir(line): + yield "".join(line.get_raw()) + else: + yield line.line def get_contents(self, format): if format == "raw": @@ -759,7 +906,7 @@ class Headline: if linenum == line.linenum: return line - for (s_lnum, struc) in self.structural: + for s_lnum, struc in self.structural: if linenum == s_lnum: return ("structural", struc) @@ -785,7 +932,7 @@ class Headline: ) def get_structural_end_after(self, linenum): - for (s_lnum, struc) in self.structural: + for s_lnum, struc in self.structural: if s_lnum > linenum and struc.strip().upper() == ":END:": return (s_lnum, struc) @@ -795,12 +942,30 @@ class Headline: sections = [] arguments = None + names_by_line = {} + for kw in self.keywords: + if kw.key == "NAME": + names_by_line[kw.linenum] = kw.value + + name = None for delimiter in self.delimiters: - if delimiter.delimiter_type == DelimiterLineType.BEGIN_BLOCK and delimiter.type_data.subtype.lower() == "src": + if ( + delimiter.delimiter_type == DelimiterLineType.BEGIN_BLOCK + and delimiter.type_data.subtype.lower() == "src" + ): line_start = delimiter.linenum inside_code = True arguments = delimiter.arguments - elif delimiter.delimiter_type == DelimiterLineType.END_BLOCK and delimiter.type_data.subtype.lower() == "src": + + name_line = line_start - 1 + if name_line in names_by_line: + name = names_by_line[name_line] + else: + name = None + elif ( + delimiter.delimiter_type == DelimiterLineType.END_BLOCK + and delimiter.type_data.subtype.lower() == "src" + ): inside_code = False start, end = line_start, delimiter.linenum @@ -811,14 +976,26 @@ class Headline: # the content parsing must be re-thinked contents = contents[:-1] + language = None + if arguments is not None: + arguments = arguments.strip() + if " " in arguments: + language = arguments[: arguments.index(" ")] + arguments = arguments[arguments.index(" ") + 1 :] + else: + language = arguments + arguments = None sections.append( { "line_first": start + 1, "line_last": end - 1, "content": contents, "arguments": arguments, + "language": language, + "name": name, } ) + name = None arguments = None line_start = None @@ -867,11 +1044,20 @@ class Headline: results = [] for section in sections: - name = None content = section["content"] code_result = section.get("result", None) arguments = section.get("arguments", None) - results.append(CodeSnippet(name=name, content=content, result=code_result, arguments=arguments)) + language = section.get("language", None) + name = section.get("name", None) + results.append( + CodeSnippet( + content=content, + result=code_result, + arguments=arguments, + language=language, + name=name, + ) + ) return results @@ -913,13 +1099,20 @@ Property = collections.namedtuple( "Property", ("linenum", "match", "key", "value", "options") ) + class ListItem: - def __init__(self, - linenum, match, + def __init__( + self, + linenum, + match, indentation, - bullet, counter, counter_sep, - checkbox_indentation, checkbox_value, - tag_indentation, tag, + bullet, + counter, + counter_sep, + checkbox_indentation, + checkbox_value, + tag_indentation, + tag, content, ): self.linenum = linenum @@ -936,10 +1129,11 @@ class ListItem: @property def text_start_pos(self): - return len(self.indentation) + 1 # Indentation + bullet + return len(self.indentation) + 1 # Indentation + bullet def append_line(self, line): - self.content += parse_content_block('\n' + line).contents + self.content += parse_content_block("\n" + line).contents + TableRow = collections.namedtuple( "TableRow", @@ -952,26 +1146,104 @@ TableRow = collections.namedtuple( ), ) + # @TODO How are [YYYY-MM-DD HH:mm--HH:mm] and ([... HH:mm]--[... HH:mm]) differentiated ? # @TODO Consider recurrence annotations class Timestamp: - def __init__(self, active, year, month, day, dow, hour, minute, repetition=None): + def __init__( + self, + active: bool = True, + year: Optional[int] = None, + month: Optional[int] = None, + day: Optional[int] = None, + dow: Optional[str] = None, + hour: Optional[int] = None, + minute: Optional[int] = None, + repetition: Optional[str] = None, + datetime_: Optional[Union[date, datetime]] = None, + ): + """ + Initializes a Timestamp instance. + + Args: + active (bool): Whether the timestamp is active. + year (Optional[int]): The year of the timestamp. + month (Optional[int]): The month of the timestamp. + day (Optional[int]): The day of the timestamp. + dow (Optional[str]): The day of the week, if any. + hour (Optional[int]): The hour of the timestamp, if any. + minute (Optional[int]): The minute of the timestamp, if any. + repetition (Optional[str]): The repetition pattern, if any. + datetime_ (Optional[Union[date, datetime]]): A date or datetime object. + + Raises: + ValueError: If neither datetime_ nor the combination of year, month, and day are provided. + """ self.active = active - self._year = year - self._month = month - self._day = day - self.dow = dow - self.hour = hour - self.minute = minute + + if datetime_ is not None: + self.from_datetime(datetime_) + elif year is not None and month is not None and day is not None: + self._year = year + self._month = month + self._day = day + self.dow = dow + self.hour = hour + self.minute = minute + else: + raise ValueError( + "Either datetime_ or year, month, and day must be provided." + ) self.repetition = repetition def to_datetime(self) -> datetime: + """ + Converts the Timestamp to a datetime object. + + Returns: + datetime: The corresponding datetime object. + """ if self.hour is not None: - return datetime(self.year, self.month, self.day, self.hour, self.minute) + return datetime( + self.year, self.month, self.day, self.hour, self.minute or 0 + ) else: return datetime(self.year, self.month, self.day, 0, 0) - def __add__(self, delta: timedelta): + def from_datetime(self, dt: Union[datetime, date]) -> None: + """ + Updates the current Timestamp instance based on a datetime or date object. + + Args: + dt (Union[datetime, date]): The datetime or date object to use for updating the instance. + """ + if isinstance(dt, datetime): + self._year = dt.year + self._month = dt.month + self._day = dt.day + self.hour = dt.hour + self.minute = dt.minute + elif isinstance(dt, date): + self._year = dt.year + self._month = dt.month + self._day = dt.day + self.hour = None + self.minute = None + else: + raise TypeError("Expected datetime or date object") + + self.dow = None # Day of the week can be set to None + + def __add__(self, delta: timedelta) -> "Timestamp": + """ + Adds a timedelta to the Timestamp. + + Args: + delta (timedelta): The time difference to add. + + Returns: + Timestamp: The resulting Timestamp instance. + """ as_dt = self.to_datetime() to_dt = as_dt + delta @@ -982,64 +1254,102 @@ class Timestamp: day=to_dt.day, dow=None, hour=to_dt.hour if self.hour is not None or to_dt.hour != 0 else None, - minute=to_dt.minute - if self.minute is not None or to_dt.minute != 0 - else None, + minute=( + to_dt.minute if self.minute is not None or to_dt.minute != 0 else None + ), repetition=self.repetition, ) - def __eq__(self, other): + def __eq__(self, other: object) -> bool: + """ + Checks if two Timestamp instances are equal. + + Args: + other (object): The other object to compare with. + + Returns: + bool: True if the instances are equal, False otherwise. + """ if not isinstance(other, Timestamp): return False return ( - (self.active == other.active) - and (self.year == other.year) - and (self.month == other.month) - and (self.day == other.day) - and (self.dow == other.dow) - and (self.hour == other.hour) - and (self.minute == other.minute) - and (self.repetition == other.repetition) + self.active == other.active + and self.year == other.year + and self.month == other.month + and self.day == other.day + and self.dow == other.dow + and self.hour == other.hour + and self.minute == other.minute + and self.repetition == other.repetition ) - def __lt__(self, other): + def __lt__(self, other: object) -> bool: + """ + Checks if the Timestamp is less than another Timestamp. + + Args: + other (object): The other object to compare with. + + Returns: + bool: True if this Timestamp is less than the other, False otherwise. + """ if not isinstance(other, Timestamp): return False return self.to_datetime() < other.to_datetime() - def __gt__(self, other): + def __gt__(self, other: object) -> bool: + """ + Checks if the Timestamp is greater than another Timestamp. + + Args: + other (object): The other object to compare with. + + Returns: + bool: True if this Timestamp is greater than the other, False otherwise. + """ if not isinstance(other, Timestamp): return False return self.to_datetime() > other.to_datetime() - def __repr__(self): + def __repr__(self) -> str: + """ + Returns a string representation of the Timestamp. + + Returns: + str: The string representation of the Timestamp. + """ return timestamp_to_string(self) - # Properties whose modification changes the Day-Of-Week @property - def year(self): + def year(self) -> int: + """Returns the year of the timestamp.""" return self._year @year.setter - def year(self, value): + def year(self, value: int) -> None: + """Sets the year of the timestamp and resets the day of the week.""" self._year = value self.dow = None @property - def month(self): + def month(self) -> int: + """Returns the month of the timestamp.""" return self._month @month.setter - def month(self, value): + def month(self, value: int) -> None: + """Sets the month of the timestamp and resets the day of the week.""" self._month = value self.dow = None @property - def day(self): + def day(self) -> int: + """Returns the day of the timestamp.""" return self._day @day.setter - def day(self, value): + def day(self, value: int) -> None: + """Sets the day of the timestamp and resets the day of the week.""" self._day = value self.dow = None @@ -1049,9 +1359,7 @@ class DelimiterLineType(Enum): END_BLOCK = 2 -BlockDelimiterTypeData = collections.namedtuple( - "BlockDelimiterTypeData", ("subtype") -) +BlockDelimiterTypeData = collections.namedtuple("BlockDelimiterTypeData", ("subtype")) DelimiterLine = collections.namedtuple( "DelimiterLine", ("linenum", "line", "delimiter_type", "type_data", "arguments") @@ -1101,76 +1409,148 @@ def token_from_type(tok_type): class TimeRange: - def __init__(self, start_time: OrgTime, end_time: OrgTime): - assert start_time is not None - assert end_time is not None + """Represents a range of time with a start and end time. + + Attributes: + start_time (OrgTime): The start time of the range. + end_time (OrgTime): The end time of the range. + """ + + def __init__(self, start_time: OrgTime, end_time: OrgTime) -> None: + """Initializes a TimeRange with a start time and an end time. + + Args: + start_time (OrgTime): The start time of the range. + end_time (OrgTime): The end time of the range. + + Raises: + AssertionError: If start_time or end_time is None. + """ + if start_time is None or end_time is None: + raise ValueError("start_time and end_time must not be None.") self.start_time = start_time self.end_time = end_time def to_raw(self) -> str: + """Converts the TimeRange to its raw string representation. + + Returns: + str: The raw string representation of the TimeRange. + """ return timerange_to_string(self) @property def duration(self) -> timedelta: + """Calculates the duration of the TimeRange. + + Returns: + timedelta: The duration between start_time and end_time. + """ delta = self.end - self.start return delta @property def start(self) -> datetime: + """Gets the start time as a datetime object. + + Returns: + datetime: The start time of the TimeRange. + """ return self.start_time.time.to_datetime() @property def end(self) -> datetime: + """Gets the end time as a datetime object. + + Returns: + datetime: The end time of the TimeRange. + """ return self.end_time.time.to_datetime() + def activate(self) -> None: + """ + Sets the active state for the times. + """ + self.start_time.active = True + self.end_time.active = True -def parse_time(value: str) -> Union[None, TimeRange, OrgTime]: - if (value.count(">--<") == 1) or (value.count("]--[") == 1): - # Time ranges with two different dates - # @TODO properly consider "=> DURATION" section - start, end = value.split("=")[0].split("--") - as_time_range = parse_org_time_range(start, end) - if (as_time_range.start_time is not None) and ( - as_time_range.end_time is not None - ): - return as_time_range - else: - raise Exception("Unknown time range format: {}".format(value)) - elif as_time := OrgTime.parse(value): - return as_time - else: - return None - - -def parse_org_time_range(start, end) -> TimeRange: - return TimeRange(OrgTime.parse(start), OrgTime.parse(end)) + def deactivate(self) -> None: + """ + Sets the inactive state for the times. + """ + self.start_time.active = False + self.end_time.active = False class OrgTime: - def __init__(self, ts: Timestamp, end_time: Optional[Timestamp] = None): - assert ts is not None + """Represents a point in time with optional end time and repetition. + + Attributes: + time (Timestamp): The start time of the OrgTime instance. + end_time (Optional[Timestamp]): The end time of the OrgTime instance, if any. + """ + + def __init__(self, ts: Timestamp, end_time: Optional[Timestamp] = None) -> None: + """Initializes an OrgTime with a start time and an optional end time. + + Args: + ts (Timestamp): The start time of the OrgTime instance. + end_time (Optional[Timestamp], optional): The end time of the OrgTime instance. Defaults to None. + + Raises: + ValueError: If ts is None. + """ + if ts is None: + raise ValueError("Timestamp (ts) must not be None.") self.time = ts self.end_time = end_time @property - def repetition(self): + def repetition(self) -> Optional[str]: + """Gets the repetition information from the start time. + + Returns: + Optional[str]: The repetition information, or None if not present. + """ return self.time.repetition @property - def duration(self): + def duration(self) -> timedelta: + """Calculates the duration between the start and end times. + + Returns: + timedelta: The duration between the start and end times. If no end time is present, returns zero timedelta. + """ if self.end_time is None: return timedelta() # No duration - else: - return self.end_time.to_datetime() - self.time.to_datetime() + return self.end_time.to_datetime() - self.time.to_datetime() - def to_raw(self): + def to_raw(self) -> str: + """Converts the OrgTime to its raw string representation. + + Returns: + str: The raw string representation of the OrgTime. + """ return timestamp_to_string(self.time, self.end_time) - def __repr__(self): + def __repr__(self) -> str: + """Provides a string representation of the OrgTime instance. + + Returns: + str: The string representation of the OrgTime. + """ return f"OrgTime({self.to_raw()})" @classmethod - def parse(self, value: str) -> OrgTime: + def parse(cls, value: str) -> Optional["OrgTime"]: + """Parses a string into an OrgTime object. + + Args: + value (str): The string representation of the OrgTime. + + Returns: + Optional[OrgTime]: The parsed OrgTime instance, or None if parsing fails. + """ if m := ACTIVE_TIME_STAMP_RE.match(value): active = True elif m := INACTIVE_TIME_STAMP_RE.match(value): @@ -1183,7 +1563,7 @@ class OrgTime: repetition = m.group("repetition").strip() if m.group("end_hour"): - return OrgTime( + return cls( Timestamp( active, int(m.group("year")), @@ -1205,7 +1585,7 @@ class OrgTime: ), ) - return OrgTime( + return cls( Timestamp( active, int(m.group("year")), @@ -1218,8 +1598,45 @@ class OrgTime: ) ) + @property + def active(self) -> bool: + """ + Checks if the time is set as active. + """ + return self.time.active -def time_from_str(s: str) -> OrgTime: + @active.setter + def active(self, value: bool) -> None: + """ + Sets the active state for the timestamp. + """ + self.time.active = value + + def activate(self) -> None: + """ + Sets the active state for the timestamp. + """ + self.active = True + + def deactivate(self) -> None: + """ + Sets the inactive state for the timestamp. + """ + self.active = False + + def from_datetime(self, dt: datetime) -> None: + """ + Updates the timestamp to use the given datetime. + + Args: + dt (datetime): The datetime to update the timestamp with. + """ + self.time.from_datetime(dt) + if self.end_time: + self.end_time.from_datetime(dt) + + +def time_from_str(s: str) -> Optional[OrgTime]: return OrgTime.parse(s) @@ -1236,7 +1653,7 @@ def timestamp_to_string(ts: Timestamp, end_time: Optional[Timestamp] = None) -> if ts.hour is not None: base = "{date} {hour:02}:{minute:02d}".format( - date=date, hour=ts.hour, minute=ts.minute + date=date, hour=ts.hour, minute=ts.minute or 0 ) else: base = date @@ -1257,6 +1674,39 @@ def timestamp_to_string(ts: Timestamp, end_time: Optional[Timestamp] = None) -> return "[{}]".format(base) +Time = Union[TimeRange, OrgTime] + + +def parse_time(value: str) -> Optional[Time]: + if (value.count(">--<") == 1) or (value.count("]--[") == 1): + # Time ranges with two different dates + # @TODO properly consider "=> DURATION" section + start, end = value.split("=")[0].split("--") + as_time_range = parse_org_time_range(start, end) + if as_time_range is None: + return None + + if (as_time_range.start_time is not None) and ( + as_time_range.end_time is not None + ): + return as_time_range + else: + raise Exception("Unknown time range format: {}".format(value)) + elif as_time := OrgTime.parse(value): + return as_time + else: + return None + + +def parse_org_time_range(start, end) -> Optional[TimeRange]: + start_time = OrgTime.parse(start) + end_time = OrgTime.parse(end) + + if start_time is None or end_time is None: + return None + return TimeRange(start_time, end_time) + + def get_raw(doc): if isinstance(doc, str): return doc @@ -1280,7 +1730,9 @@ class Line: class Link: - def __init__(self, value: str, description: str, origin: RangeInRaw): + def __init__( + self, value: str, description: Optional[str], origin: Optional[RangeInRaw] + ): self._value = value self._description = description self._origin = origin @@ -1292,12 +1744,13 @@ class Link: return "[[{}]]".format(self.value) def _update_content(self): - new_contents = [] + new_contents: List[Union[str, LinkToken]] = [] new_contents.append(self._value) if self._description: new_contents.append(LinkToken(LinkTokenType.OPEN_DESCRIPTION)) new_contents.append(self._description) - self._origin.update_range(new_contents) + if self._origin is not None: + self._origin.update_range(new_contents) @property def value(self): @@ -1326,12 +1779,13 @@ class Text: def __repr__(self): return "{{Text line: {}; content: {} }}".format(self.linenum, self.contents) - def get_text(self): + def get_text(self) -> str: return token_list_to_plaintext(self.contents) def get_raw(self): return token_list_to_raw(self.contents) + def token_list_to_plaintext(tok_list) -> str: contents = [] in_link = False @@ -1356,7 +1810,7 @@ def token_list_to_plaintext(tok_list) -> str: if not in_description: # This might happen when link doesn't have a separate description link_description = link_url - contents.append(''.join(link_description)) + contents.append("".join(link_description)) in_link = False in_description = False @@ -1367,6 +1821,7 @@ def token_list_to_plaintext(tok_list) -> str: return "".join(contents) + def token_list_to_raw(tok_list): contents = [] for chunk in tok_list: @@ -1452,7 +1907,7 @@ class Verbatim: return f"{self.Marker}{raw}{self.Marker}" -def is_pre(char: str) -> bool: +def is_pre(char: Optional[str]) -> bool: if isinstance(char, str): return char in "\n\r\t -({'\"" else: @@ -1494,12 +1949,14 @@ TOKEN_TYPE_OPEN_LINK = 3 TOKEN_TYPE_CLOSE_LINK = 4 TOKEN_TYPE_OPEN_DESCRIPTION = 5 +TokenItems = Union[Tuple[int, Union[None, str, MarkerToken]],] -def tokenize_contents(contents: str): - tokens = [] + +def tokenize_contents(contents: str) -> List[TokenItems]: + tokens: List[TokenItems] = [] last_char = None - text = [] + text: List[str] = [] closes = set() in_link = False in_link_description = False @@ -1552,7 +2009,12 @@ def tokenize_contents(contents: str): continue # Possible link close or open of description - if char == "]" and len(contents) > i + 1 and in_link: + if ( + char == "]" + and len(contents) > i + 1 + and in_link + and contents[i + 1] in "][" + ): if contents[i + 1] == "]": cut_string() @@ -1603,6 +2065,7 @@ def tokenize_contents(contents: str): cut_string() tokens.append((TOKEN_TYPE_CLOSE_MARKER, char)) has_changed = True + closes.remove(i) if not has_changed: text.append(char) @@ -1619,7 +2082,7 @@ def parse_contents(raw_contents: List[RawLine]): return [] blocks = [] - current_block = [] + current_block: List[RawLine] = [] for line in raw_contents: if len(current_block) == 0: @@ -1627,6 +2090,7 @@ def parse_contents(raw_contents: List[RawLine]): current_line = line.linenum current_block.append(line) else: + current_line = cast(int, current_line) if line.linenum == current_line + 1: # Continue with the current block current_line = line.linenum @@ -1644,7 +2108,7 @@ def parse_contents(raw_contents: List[RawLine]): return [parse_content_block(block) for block in blocks] -def parse_content_block(raw_contents: Union[List[RawLine],str]): +def parse_content_block(raw_contents: Union[List[RawLine], str]) -> Text: contents_buff = [] if isinstance(raw_contents, str): contents_buff.append(raw_contents) @@ -1652,21 +2116,24 @@ def parse_content_block(raw_contents: Union[List[RawLine],str]): for line in raw_contents: contents_buff.append(line.line) - contents = "\n".join(contents_buff) - tokens = tokenize_contents(contents) + contents_buff_text = "\n".join(contents_buff) + tokens = tokenize_contents(contents_buff_text) if isinstance(raw_contents, str): current_line = None else: current_line = raw_contents[0].linenum - contents = [] + contents: List[Union[str, MarkerToken, LinkToken]] = [] # Use tokens to tag chunks of text with it's container type - for (tok_type, tok_val) in tokens: + for tok_type, tok_val in tokens: if tok_type == TOKEN_TYPE_TEXT: + assert isinstance(tok_val, str) contents.append(tok_val) elif tok_type == TOKEN_TYPE_OPEN_MARKER: + assert isinstance(tok_val, str) contents.append(MarkerToken(False, MARKERS[tok_val])) elif tok_type == TOKEN_TYPE_CLOSE_MARKER: + assert isinstance(tok_val, str) contents.append(MarkerToken(True, MARKERS[tok_val])) elif tok_type == TOKEN_TYPE_OPEN_LINK: contents.append(LinkToken(LinkTokenType.OPEN_LINK)) @@ -1685,17 +2152,21 @@ def dump_contents(raw): elif isinstance(raw, ListItem): bullet = raw.bullet if raw.bullet else raw.counter + raw.counter_sep content_full = token_list_to_raw(raw.content) - content_lines = content_full.split('\n') - content = '\n'.join(content_lines) + content_lines = content_full.split("\n") + content = "\n".join(content_lines) checkbox = f"[{raw.checkbox_value}]" if raw.checkbox_value else "" - tag = f"{raw.tag_indentation}{token_list_to_raw(raw.tag or '')}::" if raw.tag or raw.tag_indentation else "" + tag = ( + f"{raw.tag_indentation}{token_list_to_raw(raw.tag or '')} ::" + if raw.tag or raw.tag_indentation + else "" + ) return ( raw.linenum, f"{raw.indentation}{bullet} {checkbox}{tag}{content}", ) elif isinstance(raw, TableRow): - closed = '|' if raw.last_cell_closed else '' + closed = "|" if raw.last_cell_closed else "" return ( raw.linenum, f"{' ' * raw.indentation}|{'|'.join(raw.cells)}{closed}{raw.suffix}", @@ -1723,23 +2194,25 @@ def parse_headline(hl, doc, parent) -> Headline: title = line is_done = is_todo = False for state in doc.todo_keywords or []: - if title.startswith(state + " "): + if title.startswith(state["name"] + " "): hl_state = state - title = title[len(state + " ") :] + title = title[len(state["name"] + " ") :] is_todo = True break else: for state in doc.done_keywords or []: - if title.startswith(state + " "): + if title.startswith(state["name"] + " "): hl_state = state - title = title[len(state + " ") :] + title = title[len(state["name"] + " ") :] is_done = True break contents = parse_contents(hl["contents"]) if not (isinstance(parent, OrgDoc) or depth > parent.depth): - raise AssertionError("Incorrectly parsed parent on `{}' > `{}'".format(parent.title, title)) + raise AssertionError( + "Incorrectly parsed parent on `{}' > `{}'".format(parent.title, title) + ) headline = Headline( start_line=hl["linenum"], @@ -1827,25 +2300,68 @@ def dump_delimiters(line: DelimiterLine): return (line.linenum, line.line) +def parse_todo_done_keywords(line: str) -> OrgDocDeclaredStates: + clean_line = re.sub(r"\([^)]+\)", "", line) + if "|" in clean_line: + todo_kws, done_kws = clean_line.split("|", 1) + has_split = True + else: + # Standard behavior in this case is: the last state is the one considered as DONE + todo_kws = clean_line + + todo_keywords = re.sub(r"\s{2,}", " ", todo_kws.strip()).split() + if has_split: + done_keywords = re.sub(r"\s{2,}", " ", done_kws.strip()).split() + else: + done_keywods = [todo_keywords[-1]] + todo_keywords = todo_keywords[:-1] + + return { + "not_completed": [HeadlineState(name=keyword) for keyword in todo_keywords], + "completed": [HeadlineState(name=keyword) for keyword in done_keywords], + } + + class OrgDoc: def __init__( - self, headlines, keywords, contents, list_items, structural, properties + self, + headlines, + keywords, + contents, + list_items, + structural, + properties, + delimiters, + environment=BASE_ENVIRONMENT, ): - self.todo_keywords = DEFAULT_TODO_KEYWORDS - self.done_keywords = DEFAULT_DONE_KEYWORDS + self.todo_keywords = [HeadlineState(name=kw) for kw in DEFAULT_TODO_KEYWORDS] + self.done_keywords = [HeadlineState(name=kw) for kw in DEFAULT_DONE_KEYWORDS] + self.environment = environment + keywords_set_in_file = False for keyword in keywords: if keyword.key in ("TODO", "SEQ_TODO"): - todo_kws, done_kws = re.sub(r"\(.\)", "", keyword.value).split("|", 1) + states = parse_todo_done_keywords(keyword.value) + self.todo_keywords, self.done_keywords = ( + states["not_completed"], + states["completed"], + ) + keywords_set_in_file = True - self.todo_keywords = re.sub(r"\s{2,}", " ", todo_kws.strip()).split() - self.done_keywords = re.sub(r"\s{2,}", " ", done_kws.strip()).split() + if not keywords_set_in_file and "org-todo-keywords" in environment: + # Read keywords from environment + states = parse_todo_done_keywords(environment["org-todo-keywords"]) + self.todo_keywords, self.done_keywords = ( + states["not_completed"], + states["completed"], + ) self.keywords: List[Property] = keywords self.contents: List[RawLine] = contents self.list_items: List[ListItem] = list_items self.structural: List = structural self.properties: List = properties + self.delimiters: List = delimiters self._path = None self.headlines: List[Headline] = list( map(lambda hl: parse_headline(hl, self, self), headlines) @@ -1857,7 +2373,7 @@ class OrgDoc: Created by org-roam v2. """ for p in self.properties: - if p.key == 'ID': + if p.key == "ID": return p.value return None @@ -1865,6 +2381,17 @@ class OrgDoc: def path(self): return self._path + @property + def tags(self) -> list[str]: + for kw in self.keywords: + if kw.key == "FILETAGS": + return kw.value.strip(":").split(":") + return [] + + @property + def shallow_tags(self) -> list[str]: + return self.tags + ## Querying def get_links(self): for headline in self.headlines: @@ -1893,7 +2420,7 @@ class OrgDoc: def getTopHeadlines(self): return self.headlines - def getAllHeadlines(self) -> Generator[Headline]: + def getAllHeadlines(self) -> Iterator[Headline]: todo = self.headlines[::-1] # We go backwards, to pop/append and go depth-first while len(todo) != 0: hl = todo.pop() @@ -1902,7 +2429,7 @@ class OrgDoc: yield hl def get_code_snippets(self): - for headline in self.headlines: + for headline in self.getAllHeadlines(): yield from headline.get_code_snippets() # Writing @@ -1913,12 +2440,12 @@ class OrgDoc: tags = ":" + ":".join(headline.shallow_tags) + ":" state = "" - if headline.state: - state = headline.state + " " + if headline._state: + state = headline._state["name"] + " " raw_title = token_list_to_raw(headline.title.contents) tags_padding = "" - if not raw_title.endswith(" ") and tags: + if not (raw_title.endswith(" ") or raw_title.endswith("\t")) and tags: tags_padding = " " yield "*" * headline.depth + headline.spacing + state + raw_title + tags_padding + tags @@ -1999,6 +2526,9 @@ class OrgDoc: for struct in self.structural: lines.append(dump_structural(struct)) + for content in self.delimiters: + lines.append(dump_delimiters(content)) + for kw in self.keywords: lines.append(dump_kw(kw)) @@ -2015,18 +2545,20 @@ class OrgDoc: class OrgDocReader: - def __init__(self): - self.headlines: List[Headline] = [] - self.keywords: List[Property] = [] - self.headline_hierarchy: List[OrgDoc] = [] + def __init__(self, environment=BASE_ENVIRONMENT): + self.headlines: List[HeadlineDict] = [] + self.keywords: List[Keyword] = [] + self.headline_hierarchy: List[Optional[HeadlineDict]] = [] self.contents: List[RawLine] = [] self.delimiters: List[DelimiterLine] = [] self.list_items: List[ListItem] = [] self.table_rows: List[TableRow] = [] self.structural: List = [] self.properties: List = [] + self.current_drawer: Optional[List] = None + self.environment = environment - def finalize(self): + def finalize(self) -> OrgDoc: return OrgDoc( self.headlines, self.keywords, @@ -2034,15 +2566,17 @@ class OrgDocReader: self.list_items, self.structural, self.properties, + self.delimiters, + self.environment, ) ## Construction - def add_headline(self, linenum: int, match: re.Match) -> int: + def add_headline(self, linenum: int, match: re.Match): # Position reader on the proper headline stars = match.group("stars") depth = len(stars) - headline = { + headline: HeadlineDict = { "linenum": linenum, "orig": match, "title": match.group("line"), @@ -2070,15 +2604,30 @@ class OrgDocReader: parent_idx = len(self.headline_hierarchy) - 1 while self.headline_hierarchy[parent_idx] is None: parent_idx -= 1 - self.headline_hierarchy[parent_idx]["children"].append(headline) + parent_headline = self.headline_hierarchy[parent_idx] + assert parent_headline is not None + parent_headline["children"].append(headline) self.headline_hierarchy.append(headline) if all([hl is not None for hl in self.headline_hierarchy]): - if not ([ len(hl['orig'].group('stars')) for hl in self.headline_hierarchy ] - == list(range(1, len(self.headline_hierarchy) + 1))): - raise AssertionError('Error on Headline Hierarchy') + if not ( + [ + len(cast(HeadlineDict, hl)["orig"].group("stars")) + for hl in self.headline_hierarchy + ] + == list(range(1, len(self.headline_hierarchy) + 1)) + ): + raise AssertionError("Error on Headline Hierarchy") + else: + # This might happen if headlines with more that 1 level deeper are found + pass - def add_list_item_line(self, linenum: int, match: re.Match) -> int: + # We can safely assert this as all the `None`s are there to + # support the addition of a `HeadlineDict` at the correct + # depth but not more + assert self.headline_hierarchy[-1] is not None + + def add_list_item_line(self, linenum: int, match: re.Match) -> ListItem: li = ListItem( linenum=linenum, match=match, @@ -2089,9 +2638,13 @@ class OrgDocReader: checkbox_indentation=match.group("checkbox_indentation"), checkbox_value=match.group("checkbox_value"), tag_indentation=match.group("tag_indentation"), - tag=parse_content_block( - [RawLine(linenum=linenum, line=match.group("tag"))] - ).contents if match.group("tag") else None, + tag=( + parse_content_block( + [RawLine(linenum=linenum, line=match.group("tag"))] + ).contents + if match.group("tag") + else None + ), content=parse_content_block( [RawLine(linenum=linenum, line=match.group("content"))] ).contents, @@ -2100,18 +2653,19 @@ class OrgDocReader: if len(self.headline_hierarchy) == 0: self.list_items.append(li) else: + assert self.headline_hierarchy[-1] is not None self.headline_hierarchy[-1]["list_items"].append(li) return li - def add_table_line(self, linenum: int, line: str) -> int: - chunks = line.split('|') + def add_table_line(self, linenum: int, line: str): + chunks = line.split("|") indentation = len(chunks[0]) - if chunks[-1].strip() == '': + if chunks[-1].strip() == "": suffix = chunks[-1] cells = chunks[1:-1] last_cell_closed = True else: - suffix = '' + suffix = "" cells = chunks[1:] last_cell_closed = False @@ -2126,9 +2680,10 @@ class OrgDocReader: if len(self.headline_hierarchy) == 0: self.table_rows.append(row) else: + assert self.headline_hierarchy[-1] is not None self.headline_hierarchy[-1]["table_rows"].append(row) - def add_keyword_line(self, linenum: int, match: re.Match) -> int: + def add_keyword_line(self, linenum: int, match: re.Match): options = match.group("options") kw = Keyword( linenum, @@ -2140,6 +2695,7 @@ class OrgDocReader: if len(self.headline_hierarchy) == 0: self.keywords.append(kw) else: + assert self.headline_hierarchy[-1] is not None self.headline_hierarchy[-1]["keywords"].append(kw) def add_raw_line(self, linenum: int, line: str): @@ -2147,22 +2703,35 @@ class OrgDocReader: if len(self.headline_hierarchy) == 0: self.contents.append(raw) else: + assert self.headline_hierarchy[-1] is not None self.headline_hierarchy[-1]["contents"].append(raw) def add_begin_block_line(self, linenum: int, match: re.Match): - line = DelimiterLine(linenum, match.group(0), DelimiterLineType.BEGIN_BLOCK, - BlockDelimiterTypeData(match.group("subtype")), match.group('arguments')) + line = DelimiterLine( + linenum, + match.group(0), + DelimiterLineType.BEGIN_BLOCK, + BlockDelimiterTypeData(match.group("subtype")), + match.group("arguments"), + ) if len(self.headline_hierarchy) == 0: self.delimiters.append(line) else: + assert self.headline_hierarchy[-1] is not None self.headline_hierarchy[-1]["delimiters"].append(line) def add_end_block_line(self, linenum: int, match: re.Match): - line = DelimiterLine(linenum, match.group(0), DelimiterLineType.END_BLOCK, - BlockDelimiterTypeData(match.group("subtype")), None) + line = DelimiterLine( + linenum, + match.group(0), + DelimiterLineType.END_BLOCK, + BlockDelimiterTypeData(match.group("subtype")), + None, + ) if len(self.headline_hierarchy) == 0: self.delimiters.append(line) else: + assert self.headline_hierarchy[-1] is not None self.headline_hierarchy[-1]["delimiters"].append(line) def add_property_drawer_line(self, linenum: int, line: str, match: re.Match): @@ -2170,14 +2739,17 @@ class OrgDocReader: self.current_drawer = self.properties self.structural.append((linenum, line)) else: + assert self.headline_hierarchy[-1] is not None self.current_drawer = self.headline_hierarchy[-1]["properties"] self.headline_hierarchy[-1]["structural"].append((linenum, line)) def add_results_drawer_line(self, linenum: int, line: str, match: re.Match): + assert self.headline_hierarchy[-1] is not None self.current_drawer = self.headline_hierarchy[-1]["results"] self.headline_hierarchy[-1]["structural"].append((linenum, line)) def add_logbook_drawer_line(self, linenum: int, line: str, match: re.Match): + assert self.headline_hierarchy[-1] is not None self.current_drawer = self.headline_hierarchy[-1]["logbook"] self.headline_hierarchy[-1]["structural"].append((linenum, line)) @@ -2186,26 +2758,24 @@ class OrgDocReader: if len(self.headline_hierarchy) == 0: self.structural.append((linenum, line)) else: + assert self.headline_hierarchy[-1] is not None self.headline_hierarchy[-1]["structural"].append((linenum, line)) - def add_node_properties_line(self, linenum: int, match: re.Match) -> int: + def add_node_properties_line(self, linenum: int, match: re.Match): key = match.group("key") value = match.group("value").strip() if as_time := parse_time(value): value = as_time - try: - self.current_drawer.append(Property(linenum, match, key, value, None)) - except Exception: - if "current_drawer" not in dir(self): # Throw a better error on this case - raise Exception( - "Found properties before :PROPERTIES: line. Error on Org file?" - ) - else: - raise # Let the exception pass + if self.current_drawer is None: # Throw a better error on this case + raise Exception( + "Found properties before :PROPERTIES: line. Error on Org file?" + ) - def read(self, s, environment): + self.current_drawer.append(Property(linenum, match, key, value, None)) + + def read(self, s): lines = s.split("\n") line_count = len(lines) reader = enumerate(lines) @@ -2219,8 +2789,8 @@ class OrgDocReader: nonlocal list_item nonlocal list_item_indentation if list_item: - if ((line[:list_item.text_start_pos].strip() == '') - or (len(line.strip()) == 0) + if (line[: list_item.text_start_pos].strip() == "") or ( + len(line.strip()) == 0 ): list_item.append_line(line) added = True @@ -2283,7 +2853,7 @@ class OrgDocReader: list_item = None elif m := NODE_PROPERTIES_RE.match(line): self.add_node_properties_line(linenum, m) - elif line.strip().startswith('|'): + elif line.strip().startswith("|"): self.add_table_line(linenum, line) list_item_indentation = None list_item = None @@ -2295,9 +2865,28 @@ class OrgDocReader: raise -def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=True): - reader = OrgDocReader() - reader.read(s, environment) +def loads( + s: str, environment: Optional[Dict] = BASE_ENVIRONMENT, extra_cautious: bool = True +) -> OrgDoc: + """ + Load an Org-mode document from a string. + + Args: + s (str): The string representation of the Org-mode document. + environment (Optional[dict]): The environment for parsing. Defaults to + `BASE_ENVIRONMENT`. + extra_cautious (bool): If True, perform an extra check to ensure that + the document can be re-serialized to the original string. Defaults to True. + + Returns: + OrgDoc: The loaded Org-mode document. + + Raises: + NonReproducibleDocument: If `extra_cautious` is True and there is a + difference between the original string and the re-serialized document. + """ + reader = OrgDocReader(environment) + reader.read(s) doc = reader.finalize() if extra_cautious: # Check that all options can be properly re-serialized after_dump = dumps(doc) @@ -2316,6 +2905,7 @@ def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=True): context_start = i context_last_line = i elif context_start: + assert context_last_line is not None if i > (context_last_line + DEBUG_DIFF_CONTEXT): start = max(0, context_start - DEBUG_DIFF_CONTEXT) end = min(len(diff), context_last_line + DEBUG_DIFF_CONTEXT) @@ -2328,24 +2918,61 @@ def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=True): context_last_line = None # print("---\n" + after_dump + "\n---") - raise NonReproducibleDocument("Difference found between existing version and dumped") + raise NonReproducibleDocument( + "Difference found between existing version and dumped" + ) return doc -def load(f, environment=BASE_ENVIRONMENT, extra_cautious=False): +def load( + f: TextIO, + environment: Optional[dict] = BASE_ENVIRONMENT, + extra_cautious: bool = False, +) -> OrgDoc: + """ + Load an Org-mode document from a file object. + + Args: + f (TextIO): The file object containing the Org-mode document. + environment (Optional[dict]): The environment for parsing. Defaults to + `BASE_ENVIRONMENT`. + extra_cautious (bool): If True, perform an extra check to ensure that + the document can be re-serialized to the original string. Defaults to False. + + Returns: + OrgDoc: The loaded Org-mode document. + """ doc = loads(f.read(), environment, extra_cautious) doc._path = os.path.abspath(f.name) return doc -def dumps(doc): +def dumps(doc: OrgDoc) -> str: + """ + Serialize an OrgDoc object to a string. + + Args: + doc (OrgDoc): The OrgDoc object to serialize. + + Returns: + str: The serialized string representation of the OrgDoc object. + """ dump = list(doc.dump()) result = "\n".join(dump) - # print(result) return result -def dump(doc, fp): +def dump(doc: OrgDoc, fp: TextIO) -> None: + """ + Serialize an OrgDoc object to a file. + + Args: + doc (OrgDoc): The OrgDoc object to serialize. + fp (TextIO): The file-like object to write the serialized data to. + + Returns: + None + """ it = doc.dump() # Write first line separately diff --git a/org_rw/py.typed b/org_rw/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/org_rw/types.py b/org_rw/types.py new file mode 100644 index 0000000..7bda704 --- /dev/null +++ b/org_rw/types.py @@ -0,0 +1,18 @@ +import re +from typing import List, TypedDict + + +class HeadlineDict(TypedDict): + linenum: int + orig: re.Match + title: str + contents: List + children: List + keywords: List + properties: List + logbook: List + structural: List + delimiters: List + results: List # TODO: Move to each specific code block? + list_items: List + table_rows: List diff --git a/org_rw/utils.py b/org_rw/utils.py index 0e6f559..87f6712 100644 --- a/org_rw/utils.py +++ b/org_rw/utils.py @@ -1,9 +1,20 @@ import uuid -from .org_rw import (Bold, Code, Headline, Italic, Line, RawLine, ListItem, Strike, Text, - Underlined, Verbatim) - -from .org_rw import dump_contents +from .org_rw import ( + Bold, + Code, + Headline, + Italic, + Line, + ListItem, + RawLine, + Strike, + TableRow, + Text, + Underlined, + Verbatim, + dump_contents, +) def get_hl_raw_contents(doc: Headline) -> str: @@ -40,6 +51,8 @@ def get_raw_contents(doc) -> str: return doc.get_raw() if isinstance(doc, ListItem): return dump_contents(doc)[1] + if isinstance(doc, TableRow): + return dump_contents(doc)[1] print("Unhandled type: " + str(doc)) raise NotImplementedError("Unhandled type: " + str(doc)) diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 1c51c66..0000000 --- a/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -# No external requirements at this point diff --git a/scripts/apply-formatting.sh b/scripts/apply-formatting.sh new file mode 100755 index 0000000..2f7486b --- /dev/null +++ b/scripts/apply-formatting.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +set -eu + +cd "`dirname $0`" +cd .. + +set -x + +isort --profile black . +black . diff --git a/scripts/upload-to-pip.sh b/scripts/upload-to-pip.sh index c364cbe..b5c55e4 100644 --- a/scripts/upload-to-pip.sh +++ b/scripts/upload-to-pip.sh @@ -5,6 +5,8 @@ set -eu cd "`dirname $0`" cd .. +pandoc README.org -o README.md # PyPI doesn't accept Org files + python setup.py sdist twine upload --verbose dist/* diff --git a/setup.py b/setup.py index 4ef44b3..1295538 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup setup( name="org-rw", - version="0.0.1.dev1", + version="0.0.2", description="Library to de/serialize org-files and manipulate them.", author="kenkeiras", author_email="kenkeiras@codigoparallevar.com", diff --git a/tests/03-links.org b/tests/03-links.org index ad38d7a..7ab2d75 100644 --- a/tests/03-links.org +++ b/tests/03-links.org @@ -21,3 +21,10 @@ This is a [[https://codigoparallevar.com/4][[tricky web link]​]] followed up with some text. This is [[[https://codigoparallevar.com/5][another tricky web link]]] followed up with some text. + +* Implicit links + :PROPERTIES: + :ID: 03-markup-implicit-links + :CREATED: [2020-01-01 Wed 01:01] + :END: + This is an implicit web link: https://codigoparallevar.com/implicit. diff --git a/tests/04-code.org b/tests/04-code.org index 956d961..7af3aed 100644 --- a/tests/04-code.org +++ b/tests/04-code.org @@ -9,6 +9,7 @@ :CREATED: [2020-01-01 Wed 01:01] :END: +#+NAME: first-code-name #+BEGIN_SRC shell :results verbatim echo "This is a test" echo "with two lines" diff --git a/tests/12-headlines-with-skip-levels.org b/tests/12-headlines-with-skip-levels.org new file mode 100644 index 0000000..17008be --- /dev/null +++ b/tests/12-headlines-with-skip-levels.org @@ -0,0 +1,22 @@ +#+TITLE: 12-Headlines with skip levels +#+DESCRIPTION: Simple org file to test Headlines with skip levels +#+TODO: TODO(t) PAUSED(p) | DONE(d) + +* Level 1 + :PROPERTIES: + :ID: 12-headlines-with-skip-levels + :CREATED: [2020-01-01 Wed 01:01] + :END: + +*** Level 3 + +*** Level 3-2 + +* Level 1-2 + +** Level 2 + +**** Level 4 + +*** Level3 + diff --git a/tests/13-tags.org b/tests/13-tags.org new file mode 100644 index 0000000..c61ccdf --- /dev/null +++ b/tests/13-tags.org @@ -0,0 +1,13 @@ +#+TITLE: 13-Tags +#+DESCRIPTION: Simple org file to test tags +#+FILETAGS: :filetag: + +* Level 1 :h1tag: + :PROPERTIES: + :ID: 13-tags + :CREATED: [2020-01-01 Wed 01:01] + :END: + +** Level2 :h2tag: +* Level 1-1 :otherh1tag: +** Level2 :otherh2tag: diff --git a/tests/test_org.py b/tests/test_org.py index 2f4200d..a1fdff1 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -1,14 +1,23 @@ -import logging import os import unittest -from datetime import date from datetime import datetime as DT -from org_rw import MarkerToken, MarkerType, Timestamp, dumps, load, loads, dom -import org_rw +from utils.assertions import ( + BOLD, + CODE, + HL, + ITALIC, + SPAN, + STRIKE, + UNDERLINED, + VERBATIM, + WEB_LINK, + Doc, + Tokens, +) -from utils.assertions import (BOLD, CODE, HL, ITALIC, SPAN, STRIKE, UNDERLINED, - VERBATIM, WEB_LINK, Doc, Tokens) +import org_rw +from org_rw import MarkerToken, MarkerType, Timestamp, dom, dumps, load, loads DIR = os.path.dirname(os.path.abspath(__file__)) @@ -202,7 +211,7 @@ class TestSerde(unittest.TestCase): doc = load(f) links = list(doc.get_links()) - self.assertEqual(len(links), 7) + self.assertEqual(len(links), 8) self.assertEqual(links[0].value, "https://codigoparallevar.com/1") self.assertEqual(links[0].description, "web link") @@ -224,6 +233,9 @@ class TestSerde(unittest.TestCase): self.assertEqual(links[6].value, "https://codigoparallevar.com/5") self.assertEqual(links[6].description, "another tricky web link") + self.assertEqual(links[7].value, "https://codigoparallevar.com/implicit") + self.assertEqual(links[7].description, "https://codigoparallevar.com/implicit") + ex = Doc( props=[ ("TITLE", "03-Links"), @@ -280,17 +292,35 @@ class TestSerde(unittest.TestCase): SPAN("\n"), SPAN( " This is a ", - WEB_LINK("[tricky web link]\u200b", "https://codigoparallevar.com/4"), + WEB_LINK( + "[tricky web link]\u200b", + "https://codigoparallevar.com/4", + ), " followed up with some text.\n", ), SPAN("\n"), SPAN( " This is [", - WEB_LINK("another tricky web link", "https://codigoparallevar.com/5"), + WEB_LINK( + "another tricky web link", + "https://codigoparallevar.com/5", + ), "] followed up with some text.\n", ), ], - ) + ), + HL( + "Implicit links", + props=[ + ("ID", "03-markup-implicit-links"), + ("CREATED", DT(2020, 1, 1, 1, 1)), + ], + content=[ + SPAN( + " This is an implicit web link: https://codigoparallevar.com/implicit.\n", + ), + ], + ), ), ) @@ -301,7 +331,7 @@ class TestSerde(unittest.TestCase): doc = load(f) links = list(doc.get_links()) - self.assertEqual(len(links), 7) + self.assertEqual(len(links), 8) self.assertEqual(links[0].value, "https://codigoparallevar.com/1") self.assertEqual(links[0].description, "web link") links[0].value = "https://codigoparallevar.com/1-updated" @@ -337,6 +367,9 @@ class TestSerde(unittest.TestCase): links[6].value = "https://codigoparallevar.com/5-updated" links[6].description = "another tricky web link #5 with update" + self.assertEqual(links[7].value, "https://codigoparallevar.com/implicit") + self.assertEqual(links[7].description, "https://codigoparallevar.com/implicit") + ex = Doc( props=[ ("TITLE", "03-Links"), @@ -416,7 +449,19 @@ class TestSerde(unittest.TestCase): "] followed up with some text.\n", ), ], - ) + ), + HL( + "Implicit links", + props=[ + ("ID", "03-markup-implicit-links"), + ("CREATED", DT(2020, 1, 1, 1, 1)), + ], + content=[ + SPAN( + " This is an implicit web link: https://codigoparallevar.com/implicit.\n", + ), + ], + ), ), ) @@ -435,18 +480,22 @@ class TestSerde(unittest.TestCase): snippets = list(doc.get_code_snippets()) self.assertEqual(len(snippets), 3) + self.assertEqual(snippets[0].name, "first-code-name") + self.assertEqual(snippets[0].language, "shell") self.assertEqual( snippets[0].content, 'echo "This is a test"\n' + 'echo "with two lines"\n' + "exit 0 # Exit successfully", ) - self.assertEqual(snippets[0].arguments.split(), ['shell', ':results', 'verbatim']) + self.assertEqual(snippets[0].arguments.split(), [":results", "verbatim"]) self.assertEqual( snippets[0].result, "This is a test\n" + "with two lines", ) + self.assertEqual(snippets[1].name, None) + self.assertEqual(snippets[1].language, "shell") self.assertEqual( snippets[1].content, 'echo "This is another test"\n' @@ -457,12 +506,14 @@ class TestSerde(unittest.TestCase): snippets[1].result, "This is another test\n" + "with two lines too" ) + self.assertEqual(snippets[2].name, None) + self.assertEqual(snippets[2].language, "c") self.assertEqual( snippets[2].content, - '/* This code has to be escaped to\n' - + ' * avoid confusion with new headlines.\n' - + ' */\n' - + 'main(){}', + "/* This code has to be escaped to\n" + + " * avoid confusion with new headlines.\n" + + " */\n" + + "main(){}", ) def test_mimic_write_file_05(self): @@ -500,7 +551,7 @@ class TestSerde(unittest.TestCase): hl_schedule_range = hl.children[1] self.assertEqual( hl_schedule_range.scheduled.time, - Timestamp(True, 2020, 12, 15, "Mar", 0, 5, '++1w') + Timestamp(True, 2020, 12, 15, "Mar", 0, 5, "++1w"), ) self.assertEqual( hl_schedule_range.scheduled.end_time, @@ -508,7 +559,7 @@ class TestSerde(unittest.TestCase): ) self.assertEqual( hl_schedule_range.scheduled.repetition, - '++1w', + "++1w", ) def test_update_info_file_05(self): @@ -561,7 +612,8 @@ class TestSerde(unittest.TestCase): MarkerToken(closing=False, tok_type=MarkerType.UNDERLINED_MODE), "markup", MarkerToken(closing=True, tok_type=MarkerType.UNDERLINED_MODE), - ".", "\n" + ".", + "\n", ], ) @@ -595,12 +647,24 @@ class TestSerde(unittest.TestCase): print(lists4) self.assertEqual(len(lists4), 2) - self.assertEqual(lists4[0][0].content, ["This is a list item...", "\n that spans multiple lines", "\n"]) + self.assertEqual( + lists4[0][0].content, + ["This is a list item...", "\n that spans multiple lines", "\n"], + ) self.assertEqual(lists4[0][0].bullet, "-") - self.assertEqual(lists4[0][1].content, ["This is another list item...", "\n that has content on multiple lines", "\n"]) + self.assertEqual( + lists4[0][1].content, + [ + "This is another list item...", + "\n that has content on multiple lines", + "\n", + ], + ) self.assertEqual(lists4[0][1].bullet, "-") - self.assertEqual(lists4[1][0].content, ["This is another", "\n multiline list", "\n"]) + self.assertEqual( + lists4[1][0].content, ["This is another", "\n multiline list", "\n"] + ) self.assertEqual(lists4[1][0].bullet, "-") def test_org_roam_07(self): @@ -644,20 +708,22 @@ class TestSerde(unittest.TestCase): """.strip(), ) - def test_markup_file_09(self): with open(os.path.join(DIR, "09-markup-on-headline.org")) as f: doc = load(f) hl = doc.getTopHeadlines()[0] print(hl.title) - self.assertEqual(hl.title.contents, [ - 'Headline ', - MarkerToken(closing=False, tok_type=MarkerType.UNDERLINED_MODE), - 'with', - MarkerToken(closing=True, tok_type=MarkerType.UNDERLINED_MODE), - ' markup', - ]) + self.assertEqual( + hl.title.contents, + [ + "Headline ", + MarkerToken(closing=False, tok_type=MarkerType.UNDERLINED_MODE), + "with", + MarkerToken(closing=True, tok_type=MarkerType.UNDERLINED_MODE), + " markup", + ], + ) def test_mimic_write_file_10(self): with open(os.path.join(DIR, "10-tables.org")) as f: @@ -678,9 +744,9 @@ class TestSerde(unittest.TestCase): print(first_table[0]) self.assertEqual(len(first_table[0].cells), 3) - self.assertEqual(first_table[0].cells[0].strip(), 'Header1') - self.assertEqual(first_table[0].cells[1].strip(), 'Header2') - self.assertEqual(first_table[0].cells[2].strip(), 'Header3') + self.assertEqual(first_table[0].cells[0].strip(), "Header1") + self.assertEqual(first_table[0].cells[1].strip(), "Header2") + self.assertEqual(first_table[0].cells[2].strip(), "Header3") hl = hl.children[0] @@ -690,9 +756,9 @@ class TestSerde(unittest.TestCase): print(first_table[0]) self.assertEqual(len(first_table[0].cells), 3) - self.assertEqual(first_table[0].cells[0].strip(), 'Header1') - self.assertEqual(first_table[0].cells[1].strip(), 'Header2') - self.assertEqual(first_table[0].cells[2].strip(), 'Header3') + self.assertEqual(first_table[0].cells[0].strip(), "Header1") + self.assertEqual(first_table[0].cells[1].strip(), "Header2") + self.assertEqual(first_table[0].cells[2].strip(), "Header3") def test_tables_html_file_10(self): with open(os.path.join(DIR, "10-tables.org")) as f: @@ -702,27 +768,26 @@ class TestSerde(unittest.TestCase): tree = hl.as_dom() non_props = [ - item - for item in tree - if not isinstance(item, dom.PropertyDrawerNode) + item for item in tree if not isinstance(item, dom.PropertyDrawerNode) ] - self.assertTrue(isinstance(non_props[0], dom.Text) - and isinstance(non_props[1], dom.TableNode) - and isinstance(non_props[2], dom.Text), - 'Expected ') - + self.assertTrue( + isinstance(non_props[0], dom.Text) + and isinstance(non_props[1], dom.TableNode) + and isinstance(non_props[2], dom.Text), + "Expected
", + ) hl = hl.children[0] tree = hl.as_dom() non_props = [ item for item in tree - if not (isinstance(item, dom.PropertyDrawerNode) - or isinstance(item, dom.Text)) + if not ( + isinstance(item, dom.PropertyDrawerNode) or isinstance(item, dom.Text) + ) ] print_tree(non_props) - self.assertTrue(len(non_props) == 1, - 'Expected , with only (1) element') + self.assertTrue(len(non_props) == 1, "Expected , with only (1) element") def test_nested_lists_html_file_11(self): with open(os.path.join(DIR, "11-nested-lists.org")) as f: @@ -732,30 +797,232 @@ class TestSerde(unittest.TestCase): tree = hl.as_dom() non_props = [ - item - for item in tree - if not isinstance(item, dom.PropertyDrawerNode) + item for item in tree if not isinstance(item, dom.PropertyDrawerNode) ] print_tree(non_props) - self.assertTrue((len(non_props) == 1) and (isinstance(non_props[0], dom.ListGroupNode)), - 'Expected only as top level') + self.assertTrue( + (len(non_props) == 1) and (isinstance(non_props[0], dom.ListGroupNode)), + "Expected only as top level", + ) dom_list = non_props[0] children = dom_list.children - self.assertTrue(len(children) == 5, 'Expected 5 items inside , 3 texts and 2 sublists') + self.assertTrue( + len(children) == 5, "Expected 5 items inside , 3 texts and 2 sublists" + ) # Assert texts - self.assertEqual(children[0].content, ['1']) - self.assertEqual(children[2].content, ['2']) - self.assertEqual(children[4].content[0], '3') # Might be ['3', '\n'] but shouldn't be a breaking change + self.assertEqual(children[0].content, ["1"]) + self.assertEqual(children[2].content, ["2"]) + self.assertEqual( + children[4].content[0], "3" + ) # Might be ['3', '\n'] but shouldn't be a breaking change # Assert lists - self.assertTrue(isinstance(children[1], dom.ListGroupNode), 'Expected sublist inside "1"') - self.assertEqual(children[1].children[0].content, ['1.1']) - self.assertEqual(children[1].children[1].content, ['1.2']) - self.assertTrue(isinstance(children[3], dom.ListGroupNode), 'Expected sublist inside "2"') - self.assertEqual(children[3].children[0].content, ['2.1']) - self.assertEqual(children[3].children[1].content, ['2.2']) + self.assertTrue( + isinstance(children[1], dom.ListGroupNode), 'Expected sublist inside "1"' + ) + self.assertEqual(children[1].children[0].content, ["1.1"]) + self.assertEqual(children[1].children[1].content, ["1.2"]) + self.assertTrue( + isinstance(children[3], dom.ListGroupNode), 'Expected sublist inside "2"' + ) + self.assertEqual(children[3].children[0].content, ["2.1"]) + self.assertEqual(children[3].children[1].content, ["2.2"]) + + def test_mimic_write_file_12(self): + with open(os.path.join(DIR, "12-headlines-with-skip-levels.org")) as f: + orig = f.read() + doc = loads(orig) + + self.assertEqual(dumps(doc), orig) + + def test_add_todo_keywords_programatically(self): + orig = """* NEW_TODO_STATE First entry + +* NEW_DONE_STATE Second entry""" + doc = loads( + orig, environment={"org-todo-keywords": "NEW_TODO_STATE | NEW_DONE_STATE"} + ) + self.assertEqual(doc.headlines[0].is_todo, True) + self.assertEqual(doc.headlines[0].is_done, False) + + self.assertEqual(doc.headlines[1].is_todo, False) + self.assertEqual(doc.headlines[1].is_done, True) + + self.assertEqual(dumps(doc), orig) + + def test_add_todo_keywords_in_file(self): + orig = """#+TODO: NEW_TODO_STATE | NEW_DONE_STATE + +* NEW_TODO_STATE First entry + +* NEW_DONE_STATE Second entry""" + doc = loads( + orig, environment={"org-todo-keywords": "NEW_TODO_STATE | NEW_DONE_STATE"} + ) + self.assertEqual(doc.headlines[0].is_todo, True) + self.assertEqual(doc.headlines[0].is_done, False) + + self.assertEqual(doc.headlines[1].is_todo, False) + self.assertEqual(doc.headlines[1].is_done, True) + + self.assertEqual(dumps(doc), orig) + + def test_mimic_write_file_13(self): + with open(os.path.join(DIR, "13-tags.org")) as f: + orig = f.read() + doc = loads(orig) + + self.assertEqual(dumps(doc), orig) + + def test_tag_property_read_13(self): + with open(os.path.join(DIR, "13-tags.org")) as f: + orig = f.read() + doc = loads(orig) + + self.assertEqual(doc.tags, ["filetag"]) + + h1_1, h1_2 = doc.getTopHeadlines() + self.assertEqual(sorted(h1_1.tags), ["filetag", "h1tag"]) + self.assertEqual(sorted(h1_2.tags), ["filetag", "otherh1tag"]) + + h1_1_h2 = h1_1.children[0] + self.assertEqual(sorted(h1_1_h2.tags), ["filetag", "h1tag", "h2tag"]) + + h1_2_h2 = h1_2.children[0] + self.assertEqual(sorted(h1_2_h2.tags), ["filetag", "otherh1tag", "otherh2tag"]) + + def test_shallow_tag_property_read_13(self): + with open(os.path.join(DIR, "13-tags.org")) as f: + orig = f.read() + doc = loads(orig) + + self.assertEqual(doc.shallow_tags, ["filetag"]) + + h1_1, h1_2 = doc.getTopHeadlines() + self.assertEqual(sorted(h1_1.shallow_tags), ["h1tag"]) + self.assertEqual(sorted(h1_2.shallow_tags), ["otherh1tag"]) + + h1_1_h2 = h1_1.children[0] + self.assertEqual(sorted(h1_1_h2.shallow_tags), ["h2tag"]) + + h1_2_h2 = h1_2.children[0] + self.assertEqual(sorted(h1_2_h2.shallow_tags), ["otherh2tag"]) + + def test_exclude_tags_from_inheritance_property_read_13(self): + with open(os.path.join(DIR, "13-tags.org")) as f: + orig = f.read() + doc = loads( + orig, + { + "org-tags-exclude-from-inheritance": ("h1tag", "otherh2tag"), + }, + ) + + self.assertEqual(doc.tags, ["filetag"]) + + h1_1, h1_2 = doc.getTopHeadlines() + self.assertEqual(sorted(h1_1.tags), ["filetag", "h1tag"]) + self.assertEqual(sorted(h1_2.tags), ["filetag", "otherh1tag"]) + + h1_1_h2 = h1_1.children[0] + self.assertEqual(sorted(h1_1_h2.tags), ["filetag", "h2tag"]) + + h1_2_h2 = h1_2.children[0] + self.assertEqual(sorted(h1_2_h2.tags), ["filetag", "otherh1tag", "otherh2tag"]) + + def test_select_tags_to_inheritance_property_read_13(self): + with open(os.path.join(DIR, "13-tags.org")) as f: + orig = f.read() + doc = loads( + orig, + { + "org-tags-exclude-from-inheritance": ("h1tag", "otherh2tag"), + "org-use-tag-inheritance": ("h1tag",), + }, + ) + + self.assertEqual(doc.tags, ["filetag"]) + + h1_1, h1_2 = doc.getTopHeadlines() + self.assertEqual(sorted(h1_1.tags), ["h1tag"]) + self.assertEqual(sorted(h1_2.tags), ["otherh1tag"]) + + h1_1_h2 = h1_1.children[0] + self.assertEqual(sorted(h1_1_h2.tags), ["h1tag", "h2tag"]) + + h1_2_h2 = h1_2.children[0] + self.assertEqual(sorted(h1_2_h2.tags), ["otherh2tag"]) + + def test_update_headline_from_none_to_todo(self): + orig = "* First entry" + doc = loads(orig) + self.assertEqual(doc.headlines[0].is_todo, False) + self.assertEqual(doc.headlines[0].is_done, False) + self.assertEqual(doc.headlines[0].state, None) + + doc.headlines[0].state = "TODO" + self.assertEqual(doc.headlines[0].is_todo, True) + self.assertEqual(doc.headlines[0].is_done, False) + self.assertEqual(doc.headlines[0].state["name"], "TODO") + + self.assertEqual(dumps(doc), "* TODO First entry") + + def test_update_headline_from_none_to_done(self): + orig = "* First entry" + doc = loads(orig) + self.assertEqual(doc.headlines[0].is_todo, False) + self.assertEqual(doc.headlines[0].is_done, False) + self.assertEqual(doc.headlines[0].state, None) + + doc.headlines[0].state = org_rw.HeadlineState(name="DONE") + self.assertEqual(doc.headlines[0].is_todo, False) + self.assertEqual(doc.headlines[0].is_done, True) + self.assertEqual(doc.headlines[0].state["name"], "DONE") + + self.assertEqual(dumps(doc), "* DONE First entry") + + def test_update_headline_from_todo_to_none(self): + orig = "* TODO First entry" + doc = loads(orig) + self.assertEqual(doc.headlines[0].is_todo, True) + self.assertEqual(doc.headlines[0].is_done, False) + self.assertEqual(doc.headlines[0].state["name"], "TODO") + + doc.headlines[0].state = None + self.assertEqual(doc.headlines[0].is_todo, False) + self.assertEqual(doc.headlines[0].is_done, False) + self.assertEqual(doc.headlines[0].state, None) + + self.assertEqual(dumps(doc), "* First entry") + + def test_update_headline_from_todo_to_done(self): + orig = "* TODO First entry" + doc = loads(orig) + self.assertEqual(doc.headlines[0].is_todo, True) + self.assertEqual(doc.headlines[0].is_done, False) + self.assertEqual(doc.headlines[0].state["name"], "TODO") + + doc.headlines[0].state = "DONE" + self.assertEqual(doc.headlines[0].is_todo, False) + self.assertEqual(doc.headlines[0].is_done, True) + self.assertEqual(doc.headlines[0].state["name"], "DONE") + self.assertEqual(dumps(doc), "* DONE First entry") + + def test_update_headline_from_done_to_todo(self): + orig = "* DONE First entry" + doc = loads(orig) + self.assertEqual(doc.headlines[0].is_todo, False) + self.assertEqual(doc.headlines[0].is_done, True) + self.assertEqual(doc.headlines[0].state["name"], "DONE") + + doc.headlines[0].state = org_rw.HeadlineState(name="TODO") + self.assertEqual(doc.headlines[0].is_todo, True) + self.assertEqual(doc.headlines[0].is_done, False) + self.assertEqual(doc.headlines[0].state["name"], "TODO") + + self.assertEqual(dumps(doc), "* TODO First entry") def print_tree(tree, indentation=0, headline=None): @@ -775,6 +1042,10 @@ def print_element(element, indentation, headline): if isinstance(element, org_rw.Link): print(" " * indentation * 2, "Link:", element.get_raw()) elif isinstance(element, str): - print(" " * indentation * 2, "Str[" + element.replace('\n', '') + "]", type(element)) + print( + " " * indentation * 2, + "Str[" + element.replace("\n", "") + "]", + type(element), + ) else: print_tree(element, indentation, headline) diff --git a/tests/test_timestamp.py b/tests/test_timestamp.py new file mode 100644 index 0000000..f7e0eca --- /dev/null +++ b/tests/test_timestamp.py @@ -0,0 +1,86 @@ +"""Test the Timestamp object.""" + +from datetime import date, datetime + +import pytest + +from org_rw import Timestamp + + +def test_init_with_datetime() -> None: + datetime_obj: datetime = datetime(2024, 7, 20, 15, 45) + + ts: Timestamp = Timestamp(active=True, datetime_=datetime_obj) + + assert ts.active is True + assert ts._year == 2024 + assert ts._month == 7 + assert ts._day == 20 + assert ts.hour == 15 + assert ts.minute == 45 + assert ts.dow is None + assert ts.repetition is None + + +def test_init_with_date() -> None: + date_obj: date = date(2024, 7, 20) + + ts: Timestamp = Timestamp(active=True, datetime_=date_obj) + + assert ts.active is True + assert ts._year == 2024 + assert ts._month == 7 + assert ts._day == 20 + assert ts.hour is None + assert ts.minute is None + assert ts.dow is None + assert ts.repetition is None + + +def test_init_with_year_month_day() -> None: + ts: Timestamp = Timestamp( + active=True, + year=2024, + month=7, + day=20, + hour=15, + minute=45, + dow="Saturday", + repetition=".+1d", + ) + + assert ts.active is True + assert ts._year == 2024 + assert ts._month == 7 + assert ts._day == 20 + assert ts.hour == 15 + assert ts.minute == 45 + assert ts.dow == "Saturday" + assert ts.repetition == ".+1d" + + +def test_init_without_required_arguments() -> None: + with pytest.raises(ValueError): + Timestamp(active=True) + + +def test_init_with_partial_date_info() -> None: + with pytest.raises(ValueError): + Timestamp(active=True, year=2024, month=7) + + +def test_init_with_datetime_overrides_date_info() -> None: + datetime_obj: datetime = datetime(2024, 7, 20, 15, 45) + + ts: Timestamp = Timestamp( + active=True, year=2020, month=1, day=1, datetime_=datetime_obj + ) + + assert ts.active is True + assert ts._year == 2024 + assert ts._month == 7 + assert ts._day == 20 + assert ts.hour == 15 + assert ts.minute == 45 + assert ts.dow is None + assert ts.repetition is None diff --git a/tests/utils/assertions.py b/tests/utils/assertions.py index 59dc658..9012d49 100644 --- a/tests/utils/assertions.py +++ b/tests/utils/assertions.py @@ -2,8 +2,17 @@ import collections import unittest from datetime import datetime -from org_rw import (Bold, Code, Italic, Line, Strike, Text, Underlined, - Verbatim, get_raw_contents) +from org_rw import ( + Bold, + Code, + Italic, + Line, + Strike, + Text, + Underlined, + Verbatim, + get_raw_contents, +) def timestamp_to_datetime(ts):