Merge branch 'dev/add-types' into develop

This commit is contained in:
Sergio Martínez Portela 2023-10-16 00:24:51 +02:00
commit 1f54307fdb
4 changed files with 162 additions and 60 deletions

View File

@ -0,0 +1,33 @@
name: Testing
# run-name: ${{ gitea.actor }} is testing out Gitea Actions 🚀
on: [push]
jobs:
pytest:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
uses: actions/checkout@v3
- run: apt-get update && apt-get install -y python3-pip
- run: pip install -e .
- run: pip install pytest
- run: pytest
mypy:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
uses: actions/checkout@v3
- run: apt-get update && apt-get install -y python3-pip
- run: pip install -e .
- run: pip install mypy
- run: mypy org_rw --check-untyped-defs
stability-extra-test:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
uses: actions/checkout@v3
- run: apt-get update && apt-get install -y git-core python3-pip
- run: pip install -e .
- run: bash extra-tests/check_all.sh

View File

@ -1,3 +1,6 @@
from typing import List, Optional, Union
class DrawerNode: class DrawerNode:
def __init__(self): def __init__(self):
self.children = [] self.children = []
@ -92,7 +95,7 @@ class CodeBlock(BlockNode):
def __init__(self, header, subtype, arguments): def __init__(self, header, subtype, arguments):
super().__init__() super().__init__()
self.header = header self.header = header
self.lines = None self.lines: Optional[List] = None
self.subtype = subtype self.subtype = subtype
self.arguments = arguments self.arguments = arguments
@ -100,6 +103,23 @@ class CodeBlock(BlockNode):
self.lines = lines self.lines = lines
def __repr__(self): def __repr__(self):
return "<Code: {}>".format(len(self.lines)) return "<Code: {}>".format(len(self.lines or []))
DomNode = Union[DrawerNode,
PropertyNode,
ListGroupNode,
TableNode,
TableSeparatorRow,
TableRow,
Text,
ListItem,
BlockNode,
]
ContainerDomNode = Union[DrawerNode,
ListGroupNode,
TableNode,
BlockNode,
]
from .utils import get_raw_contents from .utils import get_raw_contents

View File

@ -1,6 +1,7 @@
from __future__ import annotations from __future__ import annotations
import collections import collections
from ctypes import ArgumentError
import difflib import difflib
import logging import logging
import os import os
@ -8,7 +9,9 @@ import re
import sys import sys
from datetime import date, datetime, timedelta from datetime import date, datetime, timedelta
from enum import Enum from enum import Enum
from typing import Generator, List, Optional, Tuple, Union from typing import cast, Iterator, List, Literal, Optional, Tuple, Union
from .types import HeadlineDict
from . import dom from . import dom
@ -154,12 +157,12 @@ class RangeInRaw:
contents.insert(start_idx + i + 1, element) contents.insert(start_idx + i + 1, element)
def unescape_block_lines(lines: str) -> str: def unescape_block_lines(block: str) -> str:
""" """
Remove leading ',' from block_lines if they escape `*` characters. Remove leading ',' from block_lines if they escape `*` characters.
""" """
i = 0 i = 0
lines = lines.split('\n') lines = block.split('\n')
while i < len(lines): while i < len(lines):
line = lines[i] line = lines[i]
if (line.lstrip(' ').startswith(',') if (line.lstrip(' ').startswith(',')
@ -177,8 +180,8 @@ def unescape_block_lines(lines: str) -> str:
def get_links_from_content(content): def get_links_from_content(content):
in_link = False in_link = False
in_description = False in_description = False
link_value = [] link_value: List[str] = []
link_description = [] link_description: List[str] = []
for i, tok in enumerate(get_tokens(content)): for i, tok in enumerate(get_tokens(content)):
if isinstance(tok, LinkToken): if isinstance(tok, LinkToken):
@ -210,8 +213,8 @@ def text_to_dom(tokens, item):
in_link = False in_link = False
in_description = False in_description = False
link_value = [] link_value: List[str] = []
link_description = [] link_description: List[str] = []
contents = [] contents = []
@ -361,9 +364,10 @@ class Headline:
+ self.delimiters + self.delimiters
) )
tree = [] tree: List[dom.DomNode] = []
current_node = None current_node: Optional[dom.DomNode] = None
indentation_tree = [] indentation_tree: List[dom.ContainerDomNode] = []
contents: Optional[str] = None
for line in sorted(everything, key=get_line): for line in sorted(everything, key=get_line):
if isinstance(current_node, dom.CodeBlock): if isinstance(current_node, dom.CodeBlock):
@ -398,13 +402,13 @@ class Headline:
elif isinstance(line, Text): elif isinstance(line, Text):
tree_up = list(indentation_tree) tree_up = list(indentation_tree)
while len(tree_up) > 0: while len(tree_up) > 0:
node = tree_up[-1] node: dom.DomNode = tree_up[-1]
if (isinstance(node, dom.BlockNode) if (isinstance(node, dom.BlockNode)
or isinstance(node, dom.DrawerNode) or isinstance(node, dom.DrawerNode)
): ):
node.append(dom.Text(line)) node.append(dom.Text(line))
current_node = node current_node = node
contents = [] contents = None
break break
elif ((not isinstance(node, dom.TableNode)) and elif ((not isinstance(node, dom.TableNode)) and
(type(node) not in NON_FINISHED_GROUPS) (type(node) not in NON_FINISHED_GROUPS)
@ -419,7 +423,7 @@ class Headline:
tree_up.pop(-1) tree_up.pop(-1)
else: else:
current_node = None current_node = None
contents = [] contents = None
tree.append(dom.Text(text_to_dom(line.contents, line))) tree.append(dom.Text(text_to_dom(line.contents, line)))
indentation_tree = tree_up indentation_tree = tree_up
@ -504,6 +508,7 @@ class Headline:
node = dom.TableSeparatorRow(orig=line) node = dom.TableSeparatorRow(orig=line)
else: else:
node = dom.TableRow(line.cells, orig=line) node = dom.TableRow(line.cells, orig=line)
current_node = cast(dom.ContainerDomNode, current_node)
current_node.append(node) current_node.append(node)
elif ( elif (
@ -603,7 +608,7 @@ class Headline:
return self.get_lists() return self.get_lists()
def get_tables(self): def get_tables(self):
tables = [] tables: List[List] = [] # TableRow[][]
last_line = None last_line = None
for row in self.table_rows: for row in self.table_rows:
@ -662,6 +667,7 @@ class Headline:
time_seg = content[len("CLOCK:") :].strip() time_seg = content[len("CLOCK:") :].strip()
parsed: Union[None, OrgTime, TimeRange] = None
if "--" in time_seg: if "--" in time_seg:
# TODO: Consider duration # TODO: Consider duration
start, end = time_seg.split("=")[0].split("--") start, end = time_seg.split("=")[0].split("--")
@ -669,6 +675,8 @@ class Headline:
parsed = as_time_range parsed = as_time_range
else: else:
parsed = OrgTime.parse(time_seg) parsed = OrgTime.parse(time_seg)
if parsed is not None:
times.append(parsed) times.append(parsed)
return times return times
@ -1130,6 +1138,9 @@ def parse_time(value: str) -> Union[None, TimeRange, OrgTime]:
# @TODO properly consider "=> DURATION" section # @TODO properly consider "=> DURATION" section
start, end = value.split("=")[0].split("--") start, end = value.split("=")[0].split("--")
as_time_range = parse_org_time_range(start, end) as_time_range = parse_org_time_range(start, end)
if as_time_range is None:
return None
if (as_time_range.start_time is not None) and ( if (as_time_range.start_time is not None) and (
as_time_range.end_time is not None as_time_range.end_time is not None
): ):
@ -1142,8 +1153,13 @@ def parse_time(value: str) -> Union[None, TimeRange, OrgTime]:
return None return None
def parse_org_time_range(start, end) -> TimeRange: def parse_org_time_range(start, end) -> Optional[TimeRange]:
return TimeRange(OrgTime.parse(start), OrgTime.parse(end)) start_time = OrgTime.parse(start)
end_time = OrgTime.parse(end)
if start_time is None or end_time is None:
return None
return TimeRange(start_time, end_time)
class OrgTime: class OrgTime:
@ -1170,12 +1186,13 @@ class OrgTime:
return f"OrgTime({self.to_raw()})" return f"OrgTime({self.to_raw()})"
@classmethod @classmethod
def parse(self, value: str) -> OrgTime: def parse(self, value: str) -> Optional[OrgTime]:
if m := ACTIVE_TIME_STAMP_RE.match(value): if m := ACTIVE_TIME_STAMP_RE.match(value):
active = True active = True
elif m := INACTIVE_TIME_STAMP_RE.match(value): elif m := INACTIVE_TIME_STAMP_RE.match(value):
active = False active = False
else: else:
# raise ArgumentError("Cannot parse `{}` as OrgTime".format(value))
return None return None
repetition = None repetition = None
@ -1219,7 +1236,7 @@ class OrgTime:
) )
def time_from_str(s: str) -> OrgTime: def time_from_str(s: str) -> Optional[OrgTime]:
return OrgTime.parse(s) return OrgTime.parse(s)
@ -1280,7 +1297,7 @@ class Line:
class Link: class Link:
def __init__(self, value: str, description: str, origin: RangeInRaw): def __init__(self, value: str, description: Optional[str], origin: RangeInRaw):
self._value = value self._value = value
self._description = description self._description = description
self._origin = origin self._origin = origin
@ -1292,7 +1309,7 @@ class Link:
return "[[{}]]".format(self.value) return "[[{}]]".format(self.value)
def _update_content(self): def _update_content(self):
new_contents = [] new_contents: List[Union[str, LinkToken]] = []
new_contents.append(self._value) new_contents.append(self._value)
if self._description: if self._description:
new_contents.append(LinkToken(LinkTokenType.OPEN_DESCRIPTION)) new_contents.append(LinkToken(LinkTokenType.OPEN_DESCRIPTION))
@ -1452,7 +1469,7 @@ class Verbatim:
return f"{self.Marker}{raw}{self.Marker}" return f"{self.Marker}{raw}{self.Marker}"
def is_pre(char: str) -> bool: def is_pre(char: Optional[str]) -> bool:
if isinstance(char, str): if isinstance(char, str):
return char in "\n\r\t -({'\"" return char in "\n\r\t -({'\""
else: else:
@ -1494,12 +1511,16 @@ TOKEN_TYPE_OPEN_LINK = 3
TOKEN_TYPE_CLOSE_LINK = 4 TOKEN_TYPE_CLOSE_LINK = 4
TOKEN_TYPE_OPEN_DESCRIPTION = 5 TOKEN_TYPE_OPEN_DESCRIPTION = 5
TokenItems = Union[
Tuple[int, Union[None, str, MarkerToken]],
]
def tokenize_contents(contents: str):
tokens = [] def tokenize_contents(contents: str) -> List[TokenItems]:
tokens: List[TokenItems] = []
last_char = None last_char = None
text = [] text: List[str] = []
closes = set() closes = set()
in_link = False in_link = False
in_link_description = False in_link_description = False
@ -1619,7 +1640,7 @@ def parse_contents(raw_contents: List[RawLine]):
return [] return []
blocks = [] blocks = []
current_block = [] current_block: List[RawLine] = []
for line in raw_contents: for line in raw_contents:
if len(current_block) == 0: if len(current_block) == 0:
@ -1627,6 +1648,7 @@ def parse_contents(raw_contents: List[RawLine]):
current_line = line.linenum current_line = line.linenum
current_block.append(line) current_block.append(line)
else: else:
current_line = cast(int, current_line)
if line.linenum == current_line + 1: if line.linenum == current_line + 1:
# Continue with the current block # Continue with the current block
current_line = line.linenum current_line = line.linenum
@ -1652,21 +1674,24 @@ def parse_content_block(raw_contents: Union[List[RawLine],str]):
for line in raw_contents: for line in raw_contents:
contents_buff.append(line.line) contents_buff.append(line.line)
contents = "\n".join(contents_buff) contents_buff_text = "\n".join(contents_buff)
tokens = tokenize_contents(contents) tokens = tokenize_contents(contents_buff_text)
if isinstance(raw_contents, str): if isinstance(raw_contents, str):
current_line = None current_line = None
else: else:
current_line = raw_contents[0].linenum current_line = raw_contents[0].linenum
contents = [] contents: List[Union[str, MarkerToken, LinkToken]] = []
# Use tokens to tag chunks of text with it's container type # Use tokens to tag chunks of text with it's container type
for (tok_type, tok_val) in tokens: for (tok_type, tok_val) in tokens:
if tok_type == TOKEN_TYPE_TEXT: if tok_type == TOKEN_TYPE_TEXT:
assert isinstance(tok_val, str)
contents.append(tok_val) contents.append(tok_val)
elif tok_type == TOKEN_TYPE_OPEN_MARKER: elif tok_type == TOKEN_TYPE_OPEN_MARKER:
assert isinstance(tok_val, str)
contents.append(MarkerToken(False, MARKERS[tok_val])) contents.append(MarkerToken(False, MARKERS[tok_val]))
elif tok_type == TOKEN_TYPE_CLOSE_MARKER: elif tok_type == TOKEN_TYPE_CLOSE_MARKER:
assert isinstance(tok_val, str)
contents.append(MarkerToken(True, MARKERS[tok_val])) contents.append(MarkerToken(True, MARKERS[tok_val]))
elif tok_type == TOKEN_TYPE_OPEN_LINK: elif tok_type == TOKEN_TYPE_OPEN_LINK:
contents.append(LinkToken(LinkTokenType.OPEN_LINK)) contents.append(LinkToken(LinkTokenType.OPEN_LINK))
@ -1893,7 +1918,7 @@ class OrgDoc:
def getTopHeadlines(self): def getTopHeadlines(self):
return self.headlines return self.headlines
def getAllHeadlines(self) -> Generator[Headline]: def getAllHeadlines(self) -> Iterator[Headline]:
todo = self.headlines[::-1] # We go backwards, to pop/append and go depth-first todo = self.headlines[::-1] # We go backwards, to pop/append and go depth-first
while len(todo) != 0: while len(todo) != 0:
hl = todo.pop() hl = todo.pop()
@ -1918,7 +1943,7 @@ class OrgDoc:
raw_title = token_list_to_raw(headline.title.contents) raw_title = token_list_to_raw(headline.title.contents)
tags_padding = "" tags_padding = ""
if not raw_title.endswith(" ") and tags: if not (raw_title.endswith(" ") or raw_title.endswith("\t")) and tags:
tags_padding = " " tags_padding = " "
yield "*" * headline.depth + headline.spacing + state + raw_title + tags_padding + tags yield "*" * headline.depth + headline.spacing + state + raw_title + tags_padding + tags
@ -2016,15 +2041,16 @@ class OrgDoc:
class OrgDocReader: class OrgDocReader:
def __init__(self): def __init__(self):
self.headlines: List[Headline] = [] self.headlines: List[HeadlineDict] = []
self.keywords: List[Property] = [] self.keywords: List[Keyword] = []
self.headline_hierarchy: List[OrgDoc] = [] self.headline_hierarchy: List[HeadlineDict] = []
self.contents: List[RawLine] = [] self.contents: List[RawLine] = []
self.delimiters: List[DelimiterLine] = [] self.delimiters: List[DelimiterLine] = []
self.list_items: List[ListItem] = [] self.list_items: List[ListItem] = []
self.table_rows: List[TableRow] = [] self.table_rows: List[TableRow] = []
self.structural: List = [] self.structural: List = []
self.properties: List = [] self.properties: List = []
self.current_drawer: Optional[List] = None
def finalize(self): def finalize(self):
return OrgDoc( return OrgDoc(
@ -2037,12 +2063,12 @@ class OrgDocReader:
) )
## Construction ## Construction
def add_headline(self, linenum: int, match: re.Match) -> int: def add_headline(self, linenum: int, match: re.Match):
# Position reader on the proper headline # Position reader on the proper headline
stars = match.group("stars") stars = match.group("stars")
depth = len(stars) depth = len(stars)
headline = { headline: HeadlineDict = {
"linenum": linenum, "linenum": linenum,
"orig": match, "orig": match,
"title": match.group("line"), "title": match.group("line"),
@ -2058,27 +2084,35 @@ class OrgDocReader:
"table_rows": [], "table_rows": [],
} }
while (depth - 1) > len(self.headline_hierarchy): headline_hierarchy: List[Optional[HeadlineDict]] = list(self.headline_hierarchy)
while (depth - 1) > len(headline_hierarchy):
# Introduce structural headlines # Introduce structural headlines
self.headline_hierarchy.append(None) headline_hierarchy.append(None)
while depth <= len(self.headline_hierarchy): while depth <= len(headline_hierarchy):
self.headline_hierarchy.pop() headline_hierarchy.pop()
if depth == 1: if depth == 1:
self.headlines.append(headline) self.headlines.append(headline)
else: else:
parent_idx = len(self.headline_hierarchy) - 1 parent_idx = len(headline_hierarchy) - 1
while self.headline_hierarchy[parent_idx] is None: while headline_hierarchy[parent_idx] is None:
parent_idx -= 1 parent_idx -= 1
self.headline_hierarchy[parent_idx]["children"].append(headline) parent_headline = headline_hierarchy[parent_idx]
self.headline_hierarchy.append(headline) assert parent_headline is not None
parent_headline["children"].append(headline)
headline_hierarchy.append(headline)
if all([hl is not None for hl in self.headline_hierarchy]): if all([hl is not None for hl in headline_hierarchy]):
if not ([ len(hl['orig'].group('stars')) for hl in self.headline_hierarchy ] if not ([ len(hl['orig'].group('stars')) for hl in self.headline_hierarchy ]
== list(range(1, len(self.headline_hierarchy) + 1))): == list(range(1, len(self.headline_hierarchy) + 1))):
raise AssertionError('Error on Headline Hierarchy') raise AssertionError('Error on Headline Hierarchy')
else:
raise AssertionError('None found on headline hierarchy')
def add_list_item_line(self, linenum: int, match: re.Match) -> int: self.headline_hierarchy = cast(List[HeadlineDict], headline_hierarchy)
def add_list_item_line(self, linenum: int, match: re.Match) -> ListItem:
li = ListItem( li = ListItem(
linenum=linenum, linenum=linenum,
match=match, match=match,
@ -2103,7 +2137,7 @@ class OrgDocReader:
self.headline_hierarchy[-1]["list_items"].append(li) self.headline_hierarchy[-1]["list_items"].append(li)
return li return li
def add_table_line(self, linenum: int, line: str) -> int: def add_table_line(self, linenum: int, line: str):
chunks = line.split('|') chunks = line.split('|')
indentation = len(chunks[0]) indentation = len(chunks[0])
if chunks[-1].strip() == '': if chunks[-1].strip() == '':
@ -2128,7 +2162,7 @@ class OrgDocReader:
else: else:
self.headline_hierarchy[-1]["table_rows"].append(row) self.headline_hierarchy[-1]["table_rows"].append(row)
def add_keyword_line(self, linenum: int, match: re.Match) -> int: def add_keyword_line(self, linenum: int, match: re.Match):
options = match.group("options") options = match.group("options")
kw = Keyword( kw = Keyword(
linenum, linenum,
@ -2188,22 +2222,19 @@ class OrgDocReader:
else: else:
self.headline_hierarchy[-1]["structural"].append((linenum, line)) self.headline_hierarchy[-1]["structural"].append((linenum, line))
def add_node_properties_line(self, linenum: int, match: re.Match) -> int: def add_node_properties_line(self, linenum: int, match: re.Match):
key = match.group("key") key = match.group("key")
value = match.group("value").strip() value = match.group("value").strip()
if as_time := parse_time(value): if as_time := parse_time(value):
value = as_time value = as_time
try: if self.current_drawer is None: # Throw a better error on this case
self.current_drawer.append(Property(linenum, match, key, value, None))
except Exception:
if "current_drawer" not in dir(self): # Throw a better error on this case
raise Exception( raise Exception(
"Found properties before :PROPERTIES: line. Error on Org file?" "Found properties before :PROPERTIES: line. Error on Org file?"
) )
else:
raise # Let the exception pass self.current_drawer.append(Property(linenum, match, key, value, None))
def read(self, s, environment): def read(self, s, environment):
lines = s.split("\n") lines = s.split("\n")
@ -2316,6 +2347,7 @@ def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=True):
context_start = i context_start = i
context_last_line = i context_last_line = i
elif context_start: elif context_start:
assert context_last_line is not None
if i > (context_last_line + DEBUG_DIFF_CONTEXT): if i > (context_last_line + DEBUG_DIFF_CONTEXT):
start = max(0, context_start - DEBUG_DIFF_CONTEXT) start = max(0, context_start - DEBUG_DIFF_CONTEXT)
end = min(len(diff), context_last_line + DEBUG_DIFF_CONTEXT) end = min(len(diff), context_last_line + DEBUG_DIFF_CONTEXT)

17
org_rw/types.py Normal file
View File

@ -0,0 +1,17 @@
import re
from typing import List, TypedDict
class HeadlineDict(TypedDict):
linenum: int
orig: re.Match
title: str
contents: List
children: List
keywords: List
properties: List
logbook: List
structural: List
delimiters: List
results: List # TODO: Move to each specific code block?
list_items: List
table_rows: List