WIP: Add support for updating a headline whole text contents #4

Draft
kenkeiras wants to merge 11 commits from support-updating-raw-note-contents into develop
11 changed files with 380 additions and 86 deletions
Showing only changes of commit 56416f2fd8 - Show all commits

View File

@ -23,6 +23,26 @@ jobs:
- run: pip install mypy
- run: mypy org_rw --check-untyped-defs
style-formatting:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
uses: actions/checkout@v3
- run: apt-get update && apt-get install -y python3-pip
- run: pip install -e .
- run: pip install black
- run: black --check .
style-sorted-imports:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
uses: actions/checkout@v3
- run: apt-get update && apt-get install -y python3-pip
- run: pip install -e .
- run: pip install isort
- run: isort --profile black --check .
stability-extra-test:
runs-on: ubuntu-latest
steps:

View File

@ -41,11 +41,12 @@ class ListGroupNode:
self.children.append(child)
def get_raw(self):
return '\n'.join([c.get_raw() for c in self.children])
return "\n".join([c.get_raw() for c in self.children])
def __repr__(self):
return "<List: {}>".format(len(self.children))
class TableNode:
def __init__(self):
self.children = []
@ -56,21 +57,24 @@ class TableNode:
def __repr__(self):
return "<Table: {}>".format(len(self.children))
class TableSeparatorRow:
def __init__(self, orig=None):
self.orig = orig
class TableRow:
def __init__(self, cells, orig=None):
self.cells = cells
self.orig = orig
class Text:
def __init__(self, content):
self.content = content
def get_raw(self):
return ''.join(self.content.get_raw())
return "".join(self.content.get_raw())
class ListItem:
@ -105,7 +109,9 @@ class CodeBlock(BlockNode):
def __repr__(self):
return "<Code: {}>".format(len(self.lines or []))
DomNode = Union[DrawerNode,
DomNode = Union[
DrawerNode,
PropertyNode,
ListGroupNode,
TableNode,
@ -116,7 +122,8 @@ DomNode = Union[DrawerNode,
BlockNode,
]
ContainerDomNode = Union[DrawerNode,
ContainerDomNode = Union[
DrawerNode,
ListGroupNode,
TableNode,
BlockNode,

View File

@ -1,5 +1,4 @@
from __future__ import annotations
from datetime import timedelta
import collections
import difflib
import logging
@ -8,12 +7,22 @@ import re
import sys
from datetime import date, datetime, timedelta
from enum import Enum
from typing import Any, cast, Iterator, List, Literal, Optional, Tuple, TypedDict, TypeVar, Union
from .types import HeadlineDict
from typing import (
Dict,
Iterator,
List,
Literal,
Optional,
TextIO,
Tuple,
TypedDict,
Union,
cast,
)
from . import dom
from .types import HeadlineDict
DEBUG_DIFF_CONTEXT = 10
@ -22,7 +31,9 @@ DEFAULT_DONE_KEYWORDS = ["DONE"]
BASE_ENVIRONMENT = {
"org-footnote-section": "Footnotes",
"org-todo-keywords": ' '.join(DEFAULT_TODO_KEYWORDS) + ' | ' + ' '.join(DEFAULT_DONE_KEYWORDS),
"org-todo-keywords": " ".join(DEFAULT_TODO_KEYWORDS)
+ " | "
+ " ".join(DEFAULT_DONE_KEYWORDS),
"org-options-keywords": (
"ARCHIVE:",
"AUTHOR:",
@ -92,7 +103,7 @@ PLANNING_RE = re.compile(
r")+\s*"
)
LIST_ITEM_RE = re.compile(
r"(?P<indentation>\s*)((?P<bullet>[*\-+])|((?P<counter>\d|[a-zA-Z])(?P<counter_sep>[.)]))) ((?P<checkbox_indentation>\s*)\[(?P<checkbox_value>[ Xx])\])?((?P<tag_indentation>\s*)(?P<tag>.*?)::)?(?P<content>.*)"
r"(?P<indentation>\s*)((?P<bullet>[*\-+])|((?P<counter>\d|[a-zA-Z])(?P<counter_sep>[.)]))) ((?P<checkbox_indentation>\s*)\[(?P<checkbox_value>[ Xx])\])?((?P<tag_indentation>\s*)((?P<tag>.*?)\s::))?(?P<content>.*)"
)
IMPLICIT_LINK_RE = re.compile(r"(https?:[^<> ]*[a-zA-Z0-9])")
@ -102,7 +113,7 @@ BEGIN_BLOCK_RE = re.compile(r"^\s*#\+BEGIN_(?P<subtype>[^ ]+)(?P<arguments>.*)$"
END_BLOCK_RE = re.compile(r"^\s*#\+END_(?P<subtype>[^ ]+)\s*$", re.I)
RESULTS_DRAWER_RE = re.compile(r"^\s*:results:\s*$", re.I)
CodeSnippet = collections.namedtuple(
"CodeSnippet", ("name", "content", "result", "arguments")
"CodeSnippet", ("name", "content", "result", "language", "arguments")
)
# Groupings
@ -114,11 +125,13 @@ NON_FINISHED_GROUPS = (
)
FREE_GROUPS = (dom.CodeBlock,)
# States
class HeadlineState(TypedDict):
# To be extended to handle keyboard shortcuts
name: str
class OrgDocDeclaredStates(TypedDict):
not_completed: List[HeadlineState]
completed: List[HeadlineState]
@ -739,11 +752,20 @@ class Headline:
return times
@property
def tags(self):
if isinstance(self.parent, OrgDoc):
return list(self.shallow_tags)
else:
return list(self.shallow_tags) + self.parent.tags
def tags(self) -> list[str]:
parent_tags = self.parent.tags
if self.doc.environment.get("org-use-tag-inheritance"):
accepted_tags = []
for tag in self.doc.environment.get("org-use-tag-inheritance"):
if tag in parent_tags:
accepted_tags.append(tag)
parent_tags = accepted_tags
elif self.doc.environment.get("org-tags-exclude-from-inheritance"):
for tag in self.doc.environment.get("org-tags-exclude-from-inheritance"):
if tag in parent_tags:
parent_tags.remove(tag)
return list(self.shallow_tags) + parent_tags
def add_tag(self, tag: str):
self.shallow_tags.append(tag)
@ -899,6 +921,12 @@ class Headline:
sections = []
arguments = None
names_by_line = {}
for kw in self.keywords:
if kw.key == "NAME":
names_by_line[kw.linenum] = kw.value
name = None
for delimiter in self.delimiters:
if (
delimiter.delimiter_type == DelimiterLineType.BEGIN_BLOCK
@ -907,6 +935,12 @@ class Headline:
line_start = delimiter.linenum
inside_code = True
arguments = delimiter.arguments
name_line = line_start - 1
if name_line in names_by_line:
name = names_by_line[name_line]
else:
name = None
elif (
delimiter.delimiter_type == DelimiterLineType.END_BLOCK
and delimiter.type_data.subtype.lower() == "src"
@ -921,14 +955,26 @@ class Headline:
# the content parsing must be re-thinked
contents = contents[:-1]
language = None
if arguments is not None:
arguments = arguments.strip()
if " " in arguments:
language = arguments[: arguments.index(" ")]
arguments = arguments[arguments.index(" ") + 1 :]
else:
language = arguments
arguments = None
sections.append(
{
"line_first": start + 1,
"line_last": end - 1,
"content": contents,
"arguments": arguments,
"language": language,
"name": name,
}
)
name = None
arguments = None
line_start = None
@ -977,13 +1023,18 @@ class Headline:
results = []
for section in sections:
name = None
content = section["content"]
code_result = section.get("result", None)
arguments = section.get("arguments", None)
language = section.get("language", None)
name = section.get("name", None)
results.append(
CodeSnippet(
name=name, content=content, result=code_result, arguments=arguments
content=content,
result=code_result,
arguments=arguments,
language=language,
name=name,
)
)
@ -1145,7 +1196,9 @@ class Timestamp:
datetime: The corresponding datetime object.
"""
if self.hour is not None:
return datetime(self.year, self.month, self.day, self.hour, self.minute or 0)
return datetime(
self.year, self.month, self.day, self.hour, self.minute or 0
)
else:
return datetime(self.year, self.month, self.day, 0, 0)
@ -1544,7 +1597,6 @@ class OrgTime:
"""
return self.time.active
@active.setter
def active(self, value: bool) -> None:
"""
@ -1719,7 +1771,7 @@ class Text:
def __repr__(self):
return "{{Text line: {}; content: {} }}".format(self.linenum, self.contents)
def get_text(self):
def get_text(self) -> str:
return token_list_to_plaintext(self.contents)
def get_raw(self):
@ -1949,7 +2001,12 @@ def tokenize_contents(contents: str) -> List[TokenItems]:
continue
# Possible link close or open of description
if char == "]" and len(contents) > i + 1 and in_link:
if (
char == "]"
and len(contents) > i + 1
and in_link
and contents[i + 1] in "]["
):
if contents[i + 1] == "]":
cut_string()
@ -2000,6 +2057,7 @@ def tokenize_contents(contents: str) -> List[TokenItems]:
cut_string()
tokens.append((TOKEN_TYPE_CLOSE_MARKER, char))
has_changed = True
closes.remove(i)
if not has_changed:
text.append(char)
@ -2042,7 +2100,7 @@ def parse_contents(raw_contents: List[RawLine]):
return [parse_content_block(block) for block in blocks]
def parse_content_block(raw_contents: Union[List[RawLine], str]):
def parse_content_block(raw_contents: Union[List[RawLine], str]) -> Text:
contents_buff = []
if isinstance(raw_contents, str):
contents_buff.append(raw_contents)
@ -2128,16 +2186,16 @@ def parse_headline(hl, doc, parent) -> Headline:
title = line
is_done = is_todo = False
for state in doc.todo_keywords or []:
if title.startswith(state['name'] + " "):
if title.startswith(state["name"] + " "):
hl_state = state
title = title[len(state['name'] + " ") :]
title = title[len(state["name"] + " ") :]
is_todo = True
break
else:
for state in doc.done_keywords or []:
if title.startswith(state['name'] + " "):
if title.startswith(state["name"] + " "):
hl_state = state
title = title[len(state['name'] + " ") :]
title = title[len(state["name"] + " ") :]
is_done = True
break
@ -2236,7 +2294,7 @@ def dump_delimiters(line: DelimiterLine):
def parse_todo_done_keywords(line: str) -> OrgDocDeclaredStates:
clean_line = re.sub(r"\([^)]+\)", "", line)
if '|' in clean_line:
if "|" in clean_line:
todo_kws, done_kws = clean_line.split("|", 1)
has_split = True
else:
@ -2251,36 +2309,43 @@ def parse_todo_done_keywords(line: str) -> OrgDocDeclaredStates:
todo_keywords = todo_keywords[:-1]
return {
"not_completed": [
HeadlineState(name=keyword)
for keyword in todo_keywords
],
"completed": [
HeadlineState(name=keyword)
for keyword in done_keywords
],
"not_completed": [HeadlineState(name=keyword) for keyword in todo_keywords],
"completed": [HeadlineState(name=keyword) for keyword in done_keywords],
}
class OrgDoc:
def __init__(
self, headlines, keywords, contents, list_items, structural, properties,
self,
headlines,
keywords,
contents,
list_items,
structural,
properties,
environment=BASE_ENVIRONMENT,
):
self.todo_keywords = [HeadlineState(name=kw) for kw in DEFAULT_TODO_KEYWORDS]
self.done_keywords = [HeadlineState(name=kw) for kw in DEFAULT_DONE_KEYWORDS]
self.environment = environment
keywords_set_in_file = False
for keyword in keywords:
if keyword.key in ("TODO", "SEQ_TODO"):
states = parse_todo_done_keywords(keyword.value)
self.todo_keywords, self.done_keywords = states['not_completed'], states['completed']
self.todo_keywords, self.done_keywords = (
states["not_completed"],
states["completed"],
)
keywords_set_in_file = True
if not keywords_set_in_file and 'org-todo-keywords' in environment:
if not keywords_set_in_file and "org-todo-keywords" in environment:
# Read keywords from environment
states = parse_todo_done_keywords(environment['org-todo-keywords'])
self.todo_keywords, self.done_keywords = states['not_completed'], states['completed']
states = parse_todo_done_keywords(environment["org-todo-keywords"])
self.todo_keywords, self.done_keywords = (
states["not_completed"],
states["completed"],
)
self.keywords: List[Property] = keywords
self.contents: List[RawLine] = contents
@ -2307,6 +2372,17 @@ class OrgDoc:
def path(self):
return self._path
@property
def tags(self) -> list[str]:
for kw in self.keywords:
if kw.key == "FILETAGS":
return kw.value.strip(":").split(":")
return []
@property
def shallow_tags(self) -> list[str]:
return self.tags
## Querying
def get_links(self):
for headline in self.headlines:
@ -2344,7 +2420,7 @@ class OrgDoc:
yield hl
def get_code_snippets(self):
for headline in self.headlines:
for headline in self.getAllHeadlines():
yield from headline.get_code_snippets()
# Writing
@ -2356,7 +2432,7 @@ class OrgDoc:
state = ""
if headline.state:
state = headline.state['name'] + " "
state = headline.state["name"] + " "
raw_title = token_list_to_raw(headline.title.contents)
tags_padding = ""
@ -2470,7 +2546,7 @@ class OrgDocReader:
self.current_drawer: Optional[List] = None
self.environment = environment
def finalize(self):
def finalize(self) -> OrgDoc:
return OrgDoc(
self.headlines,
self.keywords,
@ -2776,7 +2852,26 @@ class OrgDocReader:
raise
def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=True):
def loads(
s: str, environment: Optional[Dict] = BASE_ENVIRONMENT, extra_cautious: bool = True
) -> OrgDoc:
"""
Load an Org-mode document from a string.
Args:
s (str): The string representation of the Org-mode document.
environment (Optional[dict]): The environment for parsing. Defaults to
`BASE_ENVIRONMENT`.
extra_cautious (bool): If True, perform an extra check to ensure that
the document can be re-serialized to the original string. Defaults to True.
Returns:
OrgDoc: The loaded Org-mode document.
Raises:
NonReproducibleDocument: If `extra_cautious` is True and there is a
difference between the original string and the re-serialized document.
"""
reader = OrgDocReader(environment)
reader.read(s)
doc = reader.finalize()
@ -2816,20 +2911,55 @@ def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=True):
return doc
def load(f, environment=BASE_ENVIRONMENT, extra_cautious=False):
def load(
f: TextIO,
environment: Optional[dict] = BASE_ENVIRONMENT,
extra_cautious: bool = False,
) -> OrgDoc:
"""
Load an Org-mode document from a file object.
Args:
f (TextIO): The file object containing the Org-mode document.
environment (Optional[dict]): The environment for parsing. Defaults to
`BASE_ENVIRONMENT`.
extra_cautious (bool): If True, perform an extra check to ensure that
the document can be re-serialized to the original string. Defaults to False.
Returns:
OrgDoc: The loaded Org-mode document.
"""
doc = loads(f.read(), environment, extra_cautious)
doc._path = os.path.abspath(f.name)
return doc
def dumps(doc):
def dumps(doc: OrgDoc) -> str:
"""
Serialize an OrgDoc object to a string.
Args:
doc (OrgDoc): The OrgDoc object to serialize.
Returns:
str: The serialized string representation of the OrgDoc object.
"""
dump = list(doc.dump())
result = "\n".join(dump)
# print(result)
return result
def dump(doc, fp):
def dump(doc: OrgDoc, fp: TextIO) -> None:
"""
Serialize an OrgDoc object to a file.
Args:
doc (OrgDoc): The OrgDoc object to serialize.
fp (TextIO): The file-like object to write the serialized data to.
Returns:
None
"""
it = doc.dump()
# Write first line separately

0
org_rw/py.typed Normal file
View File

View File

@ -1,9 +1,19 @@
import uuid
from .org_rw import (Bold, Code, Headline, Italic, Line, RawLine, ListItem, Strike, Text,
Underlined, Verbatim)
from .org_rw import dump_contents
from .org_rw import (
Bold,
Code,
Headline,
Italic,
Line,
ListItem,
RawLine,
Strike,
Text,
Underlined,
Verbatim,
dump_contents,
)
def get_hl_raw_contents(doc: Headline) -> str:

11
scripts/apply-formatting.sh Executable file
View File

@ -0,0 +1,11 @@
#!/bin/sh
set -eu
cd "`dirname $0`"
cd ..
set -x
isort --profile black .
black .

View File

@ -9,6 +9,7 @@
:CREATED: [2020-01-01 Wed 01:01]
:END:
#+NAME: first-code-name
#+BEGIN_SRC shell :results verbatim
echo "This is a test"
echo "with two lines"

13
tests/13-tags.org Normal file
View File

@ -0,0 +1,13 @@
#+TITLE: 13-Tags
#+DESCRIPTION: Simple org file to test tags
#+FILETAGS: :filetag:
* Level 1 :h1tag:
:PROPERTIES:
:ID: 13-tags
:CREATED: [2020-01-01 Wed 01:01]
:END:
** Level2 :h2tag:
* Level 1-1 :otherh1tag:
** Level2 :otherh2tag:

View File

@ -3,9 +3,6 @@ import tempfile
import unittest
from datetime import datetime as DT
from org_rw import MarkerToken, MarkerType, Timestamp, dumps, load, loads, dom
import org_rw
from utils.assertions import (
BOLD,
CODE,
@ -20,6 +17,9 @@ from utils.assertions import (
Tokens,
)
import org_rw
from org_rw import MarkerToken, MarkerType, Timestamp, dom, dumps, load, loads
DIR = os.path.dirname(os.path.abspath(__file__))
@ -481,20 +481,22 @@ class TestSerde(unittest.TestCase):
snippets = list(doc.get_code_snippets())
self.assertEqual(len(snippets), 3)
self.assertEqual(snippets[0].name, "first-code-name")
self.assertEqual(snippets[0].language, "shell")
self.assertEqual(
snippets[0].content,
'echo "This is a test"\n'
+ 'echo "with two lines"\n'
+ "exit 0 # Exit successfully",
)
self.assertEqual(
snippets[0].arguments.split(), ["shell", ":results", "verbatim"]
)
self.assertEqual(snippets[0].arguments.split(), [":results", "verbatim"])
self.assertEqual(
snippets[0].result,
"This is a test\n" + "with two lines",
)
self.assertEqual(snippets[1].name, None)
self.assertEqual(snippets[1].language, "shell")
self.assertEqual(
snippets[1].content,
'echo "This is another test"\n'
@ -505,6 +507,8 @@ class TestSerde(unittest.TestCase):
snippets[1].result, "This is another test\n" + "with two lines too"
)
self.assertEqual(snippets[2].name, None)
self.assertEqual(snippets[2].language, "c")
self.assertEqual(
snippets[2].content,
"/* This code has to be escaped to\n"
@ -835,12 +839,12 @@ class TestSerde(unittest.TestCase):
self.assertEqual(dumps(doc), orig)
def test_add_todo_keywords_programatically(self):
orig = '''* NEW_TODO_STATE First entry
orig = """* NEW_TODO_STATE First entry
* NEW_DONE_STATE Second entry'''
doc = loads(orig, environment={
'org-todo-keywords': "NEW_TODO_STATE | NEW_DONE_STATE"
})
* NEW_DONE_STATE Second entry"""
doc = loads(
orig, environment={"org-todo-keywords": "NEW_TODO_STATE | NEW_DONE_STATE"}
)
self.assertEqual(doc.headlines[0].is_todo, True)
self.assertEqual(doc.headlines[0].is_done, False)
@ -850,14 +854,14 @@ class TestSerde(unittest.TestCase):
self.assertEqual(dumps(doc), orig)
def test_add_todo_keywords_in_file(self):
orig = '''#+TODO: NEW_TODO_STATE | NEW_DONE_STATE
orig = """#+TODO: NEW_TODO_STATE | NEW_DONE_STATE
* NEW_TODO_STATE First entry
* NEW_DONE_STATE Second entry'''
doc = loads(orig, environment={
'org-todo-keywords': "NEW_TODO_STATE | NEW_DONE_STATE"
})
* NEW_DONE_STATE Second entry"""
doc = loads(
orig, environment={"org-todo-keywords": "NEW_TODO_STATE | NEW_DONE_STATE"}
)
self.assertEqual(doc.headlines[0].is_todo, True)
self.assertEqual(doc.headlines[0].is_done, False)
@ -946,6 +950,93 @@ class TestSerde(unittest.TestCase):
content = '\n'.join(lines[1:])
self.assertEqual(content, expected_hl_contents)
def test_mimic_write_file_13(self):
with open(os.path.join(DIR, "13-tags.org")) as f:
orig = f.read()
doc = loads(orig)
self.assertEqual(dumps(doc), orig)
def test_tag_property_read_13(self):
with open(os.path.join(DIR, "13-tags.org")) as f:
orig = f.read()
doc = loads(orig)
self.assertEqual(doc.tags, ["filetag"])
h1_1, h1_2 = doc.getTopHeadlines()
self.assertEqual(sorted(h1_1.tags), ["filetag", "h1tag"])
self.assertEqual(sorted(h1_2.tags), ["filetag", "otherh1tag"])
h1_1_h2 = h1_1.children[0]
self.assertEqual(sorted(h1_1_h2.tags), ["filetag", "h1tag", "h2tag"])
h1_2_h2 = h1_2.children[0]
self.assertEqual(sorted(h1_2_h2.tags), ["filetag", "otherh1tag", "otherh2tag"])
def test_shallow_tag_property_read_13(self):
with open(os.path.join(DIR, "13-tags.org")) as f:
orig = f.read()
doc = loads(orig)
self.assertEqual(doc.shallow_tags, ["filetag"])
h1_1, h1_2 = doc.getTopHeadlines()
self.assertEqual(sorted(h1_1.shallow_tags), ["h1tag"])
self.assertEqual(sorted(h1_2.shallow_tags), ["otherh1tag"])
h1_1_h2 = h1_1.children[0]
self.assertEqual(sorted(h1_1_h2.shallow_tags), ["h2tag"])
h1_2_h2 = h1_2.children[0]
self.assertEqual(sorted(h1_2_h2.shallow_tags), ["otherh2tag"])
def test_exclude_tags_from_inheritance_property_read_13(self):
with open(os.path.join(DIR, "13-tags.org")) as f:
orig = f.read()
doc = loads(
orig,
{
"org-tags-exclude-from-inheritance": ("h1tag", "otherh2tag"),
},
)
self.assertEqual(doc.tags, ["filetag"])
h1_1, h1_2 = doc.getTopHeadlines()
self.assertEqual(sorted(h1_1.tags), ["filetag", "h1tag"])
self.assertEqual(sorted(h1_2.tags), ["filetag", "otherh1tag"])
h1_1_h2 = h1_1.children[0]
self.assertEqual(sorted(h1_1_h2.tags), ["filetag", "h2tag"])
h1_2_h2 = h1_2.children[0]
self.assertEqual(sorted(h1_2_h2.tags), ["filetag", "otherh1tag", "otherh2tag"])
def test_select_tags_to_inheritance_property_read_13(self):
with open(os.path.join(DIR, "13-tags.org")) as f:
orig = f.read()
doc = loads(
orig,
{
"org-tags-exclude-from-inheritance": ("h1tag", "otherh2tag"),
"org-use-tag-inheritance": ("h1tag",),
},
)
self.assertEqual(doc.tags, ["filetag"])
h1_1, h1_2 = doc.getTopHeadlines()
self.assertEqual(sorted(h1_1.tags), ["h1tag"])
self.assertEqual(sorted(h1_2.tags), ["otherh1tag"])
h1_1_h2 = h1_1.children[0]
self.assertEqual(sorted(h1_1_h2.tags), ["h1tag", "h2tag"])
h1_2_h2 = h1_2.children[0]
self.assertEqual(sorted(h1_2_h2.tags), ["otherh2tag"])
def print_tree(tree, indentation=0, headline=None):
for element in tree:
print(" " * indentation * 2, "EL:", element)

View File

@ -1,7 +1,9 @@
"""Test the Timestamp object."""
import pytest
from datetime import date, datetime
import pytest
from org_rw import Timestamp

View File

@ -2,8 +2,17 @@ import collections
import unittest
from datetime import datetime
from org_rw import (Bold, Code, Italic, Line, Strike, Text, Underlined,
Verbatim, get_raw_contents)
from org_rw import (
Bold,
Code,
Italic,
Line,
Strike,
Text,
Underlined,
Verbatim,
get_raw_contents,
)
def timestamp_to_datetime(ts):