From 5ed34df57a597eb2a334bf9d4d4d886dbad0a613 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sun, 15 Oct 2023 16:34:19 +0200 Subject: [PATCH 1/7] Make typed functions pass `mypy` check. --- org_rw/dom.py | 14 ++++++ org_rw/org_rw.py | 124 ++++++++++++++++++++++++++++------------------- org_rw/types.py | 17 +++++++ 3 files changed, 104 insertions(+), 51 deletions(-) create mode 100644 org_rw/types.py diff --git a/org_rw/dom.py b/org_rw/dom.py index cb3d8fd..36493f6 100644 --- a/org_rw/dom.py +++ b/org_rw/dom.py @@ -1,3 +1,6 @@ +from typing import Union + + class DrawerNode: def __init__(self): self.children = [] @@ -102,4 +105,15 @@ class CodeBlock(BlockNode): def __repr__(self): return "".format(len(self.lines)) +DomNode = Union[DrawerNode, + PropertyNode, + ListGroupNode, + TableNode, + TableSeparatorRow, + TableRow, + Text, + ListItem, + BlockNode, + ] + from .utils import get_raw_contents diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 9a60199..b42e889 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -1,6 +1,7 @@ from __future__ import annotations import collections +from ctypes import ArgumentError import difflib import logging import os @@ -8,7 +9,9 @@ import re import sys from datetime import date, datetime, timedelta from enum import Enum -from typing import Generator, List, Optional, Tuple, Union +from typing import cast, Iterator, List, Optional, Tuple, Union + +from .types import HeadlineDict from . import dom @@ -154,12 +157,12 @@ class RangeInRaw: contents.insert(start_idx + i + 1, element) -def unescape_block_lines(lines: str) -> str: +def unescape_block_lines(block: str) -> str: """ Remove leading ',' from block_lines if they escape `*` characters. """ i = 0 - lines = lines.split('\n') + lines = block.split('\n') while i < len(lines): line = lines[i] if (line.lstrip(' ').startswith(',') @@ -177,8 +180,8 @@ def unescape_block_lines(lines: str) -> str: def get_links_from_content(content): in_link = False in_description = False - link_value = [] - link_description = [] + link_value: List[str] = [] + link_description: List[str] = [] for i, tok in enumerate(get_tokens(content)): if isinstance(tok, LinkToken): @@ -210,8 +213,8 @@ def text_to_dom(tokens, item): in_link = False in_description = False - link_value = [] - link_description = [] + link_value: List[str] = [] + link_description: List[str] = [] contents = [] @@ -361,9 +364,10 @@ class Headline: + self.delimiters ) - tree = [] - current_node = None - indentation_tree = [] + tree: List[dom.DomNode] = [] + current_node: Optional[dom.DomNode] = None + indentation_tree: List[dom.DomNode] = [] + contents: Optional[str] = None for line in sorted(everything, key=get_line): if isinstance(current_node, dom.CodeBlock): @@ -404,7 +408,7 @@ class Headline: ): node.append(dom.Text(line)) current_node = node - contents = [] + contents = None break elif ((not isinstance(node, dom.TableNode)) and (type(node) not in NON_FINISHED_GROUPS) @@ -419,7 +423,7 @@ class Headline: tree_up.pop(-1) else: current_node = None - contents = [] + contents = None tree.append(dom.Text(text_to_dom(line.contents, line))) indentation_tree = tree_up @@ -669,7 +673,9 @@ class Headline: parsed = as_time_range else: parsed = OrgTime.parse(time_seg) - times.append(parsed) + + if parsed is not None: + times.append(parsed) return times @@ -1130,6 +1136,9 @@ def parse_time(value: str) -> Union[None, TimeRange, OrgTime]: # @TODO properly consider "=> DURATION" section start, end = value.split("=")[0].split("--") as_time_range = parse_org_time_range(start, end) + if as_time_range is None: + return None + if (as_time_range.start_time is not None) and ( as_time_range.end_time is not None ): @@ -1142,8 +1151,13 @@ def parse_time(value: str) -> Union[None, TimeRange, OrgTime]: return None -def parse_org_time_range(start, end) -> TimeRange: - return TimeRange(OrgTime.parse(start), OrgTime.parse(end)) +def parse_org_time_range(start, end) -> Optional[TimeRange]: + start_time = OrgTime.parse(start) + end_time = OrgTime.parse(end) + + if start_time is None or end_time is None: + return None + return TimeRange(start_time, end_time) class OrgTime: @@ -1170,12 +1184,13 @@ class OrgTime: return f"OrgTime({self.to_raw()})" @classmethod - def parse(self, value: str) -> OrgTime: + def parse(self, value: str) -> Optional[OrgTime]: if m := ACTIVE_TIME_STAMP_RE.match(value): active = True elif m := INACTIVE_TIME_STAMP_RE.match(value): active = False else: + # raise ArgumentError("Cannot parse `{}` as OrgTime".format(value)) return None repetition = None @@ -1219,7 +1234,7 @@ class OrgTime: ) -def time_from_str(s: str) -> OrgTime: +def time_from_str(s: str) -> Optional[OrgTime]: return OrgTime.parse(s) @@ -1280,7 +1295,7 @@ class Line: class Link: - def __init__(self, value: str, description: str, origin: RangeInRaw): + def __init__(self, value: str, description: Optional[str], origin: RangeInRaw): self._value = value self._description = description self._origin = origin @@ -1452,7 +1467,7 @@ class Verbatim: return f"{self.Marker}{raw}{self.Marker}" -def is_pre(char: str) -> bool: +def is_pre(char: Optional[str]) -> bool: if isinstance(char, str): return char in "\n\r\t -({'\"" else: @@ -1499,7 +1514,7 @@ def tokenize_contents(contents: str): tokens = [] last_char = None - text = [] + text: List[str] = [] closes = set() in_link = False in_link_description = False @@ -1619,7 +1634,7 @@ def parse_contents(raw_contents: List[RawLine]): return [] blocks = [] - current_block = [] + current_block: List[RawLine] = [] for line in raw_contents: if len(current_block) == 0: @@ -1627,6 +1642,7 @@ def parse_contents(raw_contents: List[RawLine]): current_line = line.linenum current_block.append(line) else: + current_line = cast(int, current_line) if line.linenum == current_line + 1: # Continue with the current block current_line = line.linenum @@ -1652,8 +1668,8 @@ def parse_content_block(raw_contents: Union[List[RawLine],str]): for line in raw_contents: contents_buff.append(line.line) - contents = "\n".join(contents_buff) - tokens = tokenize_contents(contents) + contents_buff_text = "\n".join(contents_buff) + tokens = tokenize_contents(contents_buff_text) if isinstance(raw_contents, str): current_line = None else: @@ -1893,7 +1909,7 @@ class OrgDoc: def getTopHeadlines(self): return self.headlines - def getAllHeadlines(self) -> Generator[Headline]: + def getAllHeadlines(self) -> Iterator[Headline]: todo = self.headlines[::-1] # We go backwards, to pop/append and go depth-first while len(todo) != 0: hl = todo.pop() @@ -2016,15 +2032,16 @@ class OrgDoc: class OrgDocReader: def __init__(self): - self.headlines: List[Headline] = [] - self.keywords: List[Property] = [] - self.headline_hierarchy: List[OrgDoc] = [] + self.headlines: List[HeadlineDict] = [] + self.keywords: List[Keyword] = [] + self.headline_hierarchy: List[HeadlineDict] = [] self.contents: List[RawLine] = [] self.delimiters: List[DelimiterLine] = [] self.list_items: List[ListItem] = [] self.table_rows: List[TableRow] = [] self.structural: List = [] self.properties: List = [] + self.current_drawer: Optional[List] = None def finalize(self): return OrgDoc( @@ -2037,12 +2054,12 @@ class OrgDocReader: ) ## Construction - def add_headline(self, linenum: int, match: re.Match) -> int: + def add_headline(self, linenum: int, match: re.Match): # Position reader on the proper headline stars = match.group("stars") depth = len(stars) - headline = { + headline: HeadlineDict = { "linenum": linenum, "orig": match, "title": match.group("line"), @@ -2058,27 +2075,35 @@ class OrgDocReader: "table_rows": [], } - while (depth - 1) > len(self.headline_hierarchy): + headline_hierarchy: List[Optional[HeadlineDict]] = list(self.headline_hierarchy) + + while (depth - 1) > len(headline_hierarchy): # Introduce structural headlines - self.headline_hierarchy.append(None) - while depth <= len(self.headline_hierarchy): - self.headline_hierarchy.pop() + headline_hierarchy.append(None) + while depth <= len(headline_hierarchy): + headline_hierarchy.pop() if depth == 1: self.headlines.append(headline) else: - parent_idx = len(self.headline_hierarchy) - 1 - while self.headline_hierarchy[parent_idx] is None: + parent_idx = len(headline_hierarchy) - 1 + while headline_hierarchy[parent_idx] is None: parent_idx -= 1 - self.headline_hierarchy[parent_idx]["children"].append(headline) - self.headline_hierarchy.append(headline) + parent_headline = headline_hierarchy[parent_idx] + assert parent_headline is not None + parent_headline["children"].append(headline) + headline_hierarchy.append(headline) - if all([hl is not None for hl in self.headline_hierarchy]): + if all([hl is not None for hl in headline_hierarchy]): if not ([ len(hl['orig'].group('stars')) for hl in self.headline_hierarchy ] == list(range(1, len(self.headline_hierarchy) + 1))): raise AssertionError('Error on Headline Hierarchy') + else: + raise AssertionError('None found on headline hierarchy') - def add_list_item_line(self, linenum: int, match: re.Match) -> int: + self.headline_hierarchy = cast(List[HeadlineDict], headline_hierarchy) + + def add_list_item_line(self, linenum: int, match: re.Match) -> ListItem: li = ListItem( linenum=linenum, match=match, @@ -2103,7 +2128,7 @@ class OrgDocReader: self.headline_hierarchy[-1]["list_items"].append(li) return li - def add_table_line(self, linenum: int, line: str) -> int: + def add_table_line(self, linenum: int, line: str): chunks = line.split('|') indentation = len(chunks[0]) if chunks[-1].strip() == '': @@ -2128,7 +2153,7 @@ class OrgDocReader: else: self.headline_hierarchy[-1]["table_rows"].append(row) - def add_keyword_line(self, linenum: int, match: re.Match) -> int: + def add_keyword_line(self, linenum: int, match: re.Match): options = match.group("options") kw = Keyword( linenum, @@ -2188,22 +2213,19 @@ class OrgDocReader: else: self.headline_hierarchy[-1]["structural"].append((linenum, line)) - def add_node_properties_line(self, linenum: int, match: re.Match) -> int: + def add_node_properties_line(self, linenum: int, match: re.Match): key = match.group("key") value = match.group("value").strip() if as_time := parse_time(value): value = as_time - try: - self.current_drawer.append(Property(linenum, match, key, value, None)) - except Exception: - if "current_drawer" not in dir(self): # Throw a better error on this case - raise Exception( - "Found properties before :PROPERTIES: line. Error on Org file?" - ) - else: - raise # Let the exception pass + if self.current_drawer is None: # Throw a better error on this case + raise Exception( + "Found properties before :PROPERTIES: line. Error on Org file?" + ) + + self.current_drawer.append(Property(linenum, match, key, value, None)) def read(self, s, environment): lines = s.split("\n") diff --git a/org_rw/types.py b/org_rw/types.py new file mode 100644 index 0000000..eff7f59 --- /dev/null +++ b/org_rw/types.py @@ -0,0 +1,17 @@ +import re +from typing import List, TypedDict + +class HeadlineDict(TypedDict): + linenum: int + orig: re.Match + title: str + contents: List + children: List + keywords: List + properties: List + logbook: List + structural: List + delimiters: List + results: List # TODO: Move to each specific code block? + list_items: List + table_rows: List From 343d864559ae439711b8e55c5288262503da6c6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sun, 15 Oct 2023 16:38:00 +0200 Subject: [PATCH 2/7] Fix handling of headline title-tags separated by tabs. --- org_rw/org_rw.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index b42e889..318c4df 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -1934,7 +1934,7 @@ class OrgDoc: raw_title = token_list_to_raw(headline.title.contents) tags_padding = "" - if not raw_title.endswith(" ") and tags: + if not (raw_title.endswith(" ") or raw_title.endswith("\t")) and tags: tags_padding = " " yield "*" * headline.depth + headline.spacing + state + raw_title + tags_padding + tags From f7ddddb8c950007137878a400c8d04bde6102a18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sun, 15 Oct 2023 17:54:59 +0200 Subject: [PATCH 3/7] Add PyTest Gitea action. --- .gitea/workflows/pytest.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .gitea/workflows/pytest.yaml diff --git a/.gitea/workflows/pytest.yaml b/.gitea/workflows/pytest.yaml new file mode 100644 index 0000000..8839413 --- /dev/null +++ b/.gitea/workflows/pytest.yaml @@ -0,0 +1,14 @@ +name: Pytest +# run-name: ${{ gitea.actor }} is testing out Gitea Actions 🚀 +on: [push] + +jobs: + pytest: + runs-on: ubuntu-latest + steps: + - name: Check out repository code + uses: actions/checkout@v3 + - run: apt-get update && apt-get install -y python3-pip + - run: pip install -e . + - run: pip install pytest + - run: pytest From da1288a6ba5535d31acd904d0622a605c09ba1d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sun, 15 Oct 2023 23:34:29 +0200 Subject: [PATCH 4/7] Add MyPy Gitea action. --- .gitea/workflows/mypy.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .gitea/workflows/mypy.yaml diff --git a/.gitea/workflows/mypy.yaml b/.gitea/workflows/mypy.yaml new file mode 100644 index 0000000..3c1604d --- /dev/null +++ b/.gitea/workflows/mypy.yaml @@ -0,0 +1,14 @@ +name: Mypy +# run-name: ${{ gitea.actor }} is testing out Gitea Actions 🚀 +on: [push] + +jobs: + mypy: + runs-on: ubuntu-latest + steps: + - name: Check out repository code + uses: actions/checkout@v3 + - run: apt-get update && apt-get install -y python3-pip + - run: pip install -e . + - run: pip install mypy + - run: mypy org_rw From 61246da52170fbbb3ec95b8519189c265ce7fad2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sun, 15 Oct 2023 23:37:45 +0200 Subject: [PATCH 5/7] Merge MyPy and Pytest Gitea actions. --- .gitea/workflows/mypy.yaml | 14 -------------- .gitea/workflows/pytest.yaml | 12 +++++++++++- 2 files changed, 11 insertions(+), 15 deletions(-) delete mode 100644 .gitea/workflows/mypy.yaml diff --git a/.gitea/workflows/mypy.yaml b/.gitea/workflows/mypy.yaml deleted file mode 100644 index 3c1604d..0000000 --- a/.gitea/workflows/mypy.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: Mypy -# run-name: ${{ gitea.actor }} is testing out Gitea Actions 🚀 -on: [push] - -jobs: - mypy: - runs-on: ubuntu-latest - steps: - - name: Check out repository code - uses: actions/checkout@v3 - - run: apt-get update && apt-get install -y python3-pip - - run: pip install -e . - - run: pip install mypy - - run: mypy org_rw diff --git a/.gitea/workflows/pytest.yaml b/.gitea/workflows/pytest.yaml index 8839413..fe4b961 100644 --- a/.gitea/workflows/pytest.yaml +++ b/.gitea/workflows/pytest.yaml @@ -1,4 +1,4 @@ -name: Pytest +name: Testing # run-name: ${{ gitea.actor }} is testing out Gitea Actions 🚀 on: [push] @@ -12,3 +12,13 @@ jobs: - run: pip install -e . - run: pip install pytest - run: pytest + + mypy: + runs-on: ubuntu-latest + steps: + - name: Check out repository code + uses: actions/checkout@v3 + - run: apt-get update && apt-get install -y python3-pip + - run: pip install -e . + - run: pip install mypy + - run: mypy org_rw From 9fb4bce5ef40d379cb2e98922e7ad7f68a021bb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Sun, 15 Oct 2023 23:41:20 +0200 Subject: [PATCH 6/7] Add extra-test validation. --- .gitea/workflows/{pytest.yaml => tests.yaml} | 9 +++++++++ 1 file changed, 9 insertions(+) rename .gitea/workflows/{pytest.yaml => tests.yaml} (69%) diff --git a/.gitea/workflows/pytest.yaml b/.gitea/workflows/tests.yaml similarity index 69% rename from .gitea/workflows/pytest.yaml rename to .gitea/workflows/tests.yaml index fe4b961..2246ad6 100644 --- a/.gitea/workflows/pytest.yaml +++ b/.gitea/workflows/tests.yaml @@ -22,3 +22,12 @@ jobs: - run: pip install -e . - run: pip install mypy - run: mypy org_rw + + stability-extra-test: + runs-on: ubuntu-latest + steps: + - name: Check out repository code + uses: actions/checkout@v3 + - run: apt-get update && apt-get install -y git-core python3-pip + - run: pip install -e . + - run: bash extra-tests/check_all.sh From 1d0b4cce14c27ad30937c7aeadcf7e8ae5080670 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Mon, 16 Oct 2023 00:21:30 +0200 Subject: [PATCH 7/7] Complete typing with `mypy --check-untyped-defs`. --- .gitea/workflows/tests.yaml | 2 +- org_rw/dom.py | 12 +++++++++--- org_rw/org_rw.py | 26 ++++++++++++++++++-------- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/.gitea/workflows/tests.yaml b/.gitea/workflows/tests.yaml index 2246ad6..ee49a29 100644 --- a/.gitea/workflows/tests.yaml +++ b/.gitea/workflows/tests.yaml @@ -21,7 +21,7 @@ jobs: - run: apt-get update && apt-get install -y python3-pip - run: pip install -e . - run: pip install mypy - - run: mypy org_rw + - run: mypy org_rw --check-untyped-defs stability-extra-test: runs-on: ubuntu-latest diff --git a/org_rw/dom.py b/org_rw/dom.py index 36493f6..cd8d63b 100644 --- a/org_rw/dom.py +++ b/org_rw/dom.py @@ -1,4 +1,4 @@ -from typing import Union +from typing import List, Optional, Union class DrawerNode: @@ -95,7 +95,7 @@ class CodeBlock(BlockNode): def __init__(self, header, subtype, arguments): super().__init__() self.header = header - self.lines = None + self.lines: Optional[List] = None self.subtype = subtype self.arguments = arguments @@ -103,7 +103,7 @@ class CodeBlock(BlockNode): self.lines = lines def __repr__(self): - return "".format(len(self.lines)) + return "".format(len(self.lines or [])) DomNode = Union[DrawerNode, PropertyNode, @@ -116,4 +116,10 @@ DomNode = Union[DrawerNode, BlockNode, ] +ContainerDomNode = Union[DrawerNode, + ListGroupNode, + TableNode, + BlockNode, + ] + from .utils import get_raw_contents diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 318c4df..5bb205e 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -9,7 +9,7 @@ import re import sys from datetime import date, datetime, timedelta from enum import Enum -from typing import cast, Iterator, List, Optional, Tuple, Union +from typing import cast, Iterator, List, Literal, Optional, Tuple, Union from .types import HeadlineDict @@ -366,7 +366,7 @@ class Headline: tree: List[dom.DomNode] = [] current_node: Optional[dom.DomNode] = None - indentation_tree: List[dom.DomNode] = [] + indentation_tree: List[dom.ContainerDomNode] = [] contents: Optional[str] = None for line in sorted(everything, key=get_line): @@ -402,7 +402,7 @@ class Headline: elif isinstance(line, Text): tree_up = list(indentation_tree) while len(tree_up) > 0: - node = tree_up[-1] + node: dom.DomNode = tree_up[-1] if (isinstance(node, dom.BlockNode) or isinstance(node, dom.DrawerNode) ): @@ -508,6 +508,7 @@ class Headline: node = dom.TableSeparatorRow(orig=line) else: node = dom.TableRow(line.cells, orig=line) + current_node = cast(dom.ContainerDomNode, current_node) current_node.append(node) elif ( @@ -607,7 +608,7 @@ class Headline: return self.get_lists() def get_tables(self): - tables = [] + tables: List[List] = [] # TableRow[][] last_line = None for row in self.table_rows: @@ -666,6 +667,7 @@ class Headline: time_seg = content[len("CLOCK:") :].strip() + parsed: Union[None, OrgTime, TimeRange] = None if "--" in time_seg: # TODO: Consider duration start, end = time_seg.split("=")[0].split("--") @@ -1307,7 +1309,7 @@ class Link: return "[[{}]]".format(self.value) def _update_content(self): - new_contents = [] + new_contents: List[Union[str, LinkToken]] = [] new_contents.append(self._value) if self._description: new_contents.append(LinkToken(LinkTokenType.OPEN_DESCRIPTION)) @@ -1509,9 +1511,13 @@ TOKEN_TYPE_OPEN_LINK = 3 TOKEN_TYPE_CLOSE_LINK = 4 TOKEN_TYPE_OPEN_DESCRIPTION = 5 +TokenItems = Union[ + Tuple[int, Union[None, str, MarkerToken]], +] -def tokenize_contents(contents: str): - tokens = [] + +def tokenize_contents(contents: str) -> List[TokenItems]: + tokens: List[TokenItems] = [] last_char = None text: List[str] = [] @@ -1675,14 +1681,17 @@ def parse_content_block(raw_contents: Union[List[RawLine],str]): else: current_line = raw_contents[0].linenum - contents = [] + contents: List[Union[str, MarkerToken, LinkToken]] = [] # Use tokens to tag chunks of text with it's container type for (tok_type, tok_val) in tokens: if tok_type == TOKEN_TYPE_TEXT: + assert isinstance(tok_val, str) contents.append(tok_val) elif tok_type == TOKEN_TYPE_OPEN_MARKER: + assert isinstance(tok_val, str) contents.append(MarkerToken(False, MARKERS[tok_val])) elif tok_type == TOKEN_TYPE_CLOSE_MARKER: + assert isinstance(tok_val, str) contents.append(MarkerToken(True, MARKERS[tok_val])) elif tok_type == TOKEN_TYPE_OPEN_LINK: contents.append(LinkToken(LinkTokenType.OPEN_LINK)) @@ -2338,6 +2347,7 @@ def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=True): context_start = i context_last_line = i elif context_start: + assert context_last_line is not None if i > (context_last_line + DEBUG_DIFF_CONTEXT): start = max(0, context_start - DEBUG_DIFF_CONTEXT) end = min(len(diff), context_last_line + DEBUG_DIFF_CONTEXT)