Compare commits

..

10 Commits

Author SHA1 Message Date
Sergio Martínez Portela
55fc87cfdc Add absence of dependencies as principle.
All checks were successful
Testing / pytest (push) Successful in 20s
Testing / mypy (push) Successful in 29s
Testing / style-formatting (push) Successful in 19s
Testing / style-sorted-imports (push) Successful in 16s
Testing / stability-extra-test (push) Successful in 22s
2025-04-16 01:00:09 +02:00
Sergio Martínez Portela
f936bccf7f doc: Add a small "Principles" section to README.
All checks were successful
Testing / pytest (push) Successful in 17s
Testing / mypy (push) Successful in 22s
Testing / style-formatting (push) Successful in 20s
Testing / style-sorted-imports (push) Successful in 19s
Testing / stability-extra-test (push) Successful in 21s
2025-04-16 00:46:52 +02:00
78bd091e61 Merge pull request 'Multiple fixes on loader due to extended tests.' (#15) from fixes/loading into develop
All checks were successful
Testing / pytest (push) Successful in 20s
Testing / mypy (push) Successful in 27s
Testing / style-formatting (push) Successful in 23s
Testing / style-sorted-imports (push) Successful in 19s
Testing / stability-extra-test (push) Successful in 23s
Reviewed-on: #15
2025-04-15 21:56:51 +00:00
Sergio Martínez Portela
3b90723250 format: Automatic formatting fixes.
All checks were successful
Testing / pytest (push) Successful in 23s
Testing / mypy (push) Successful in 28s
Testing / style-formatting (push) Successful in 23s
Testing / style-sorted-imports (push) Successful in 19s
Testing / stability-extra-test (push) Successful in 26s
2025-02-09 16:50:52 +01:00
Sergio Martínez Portela
506a17dc5c fix(org_rw): Ensure closing delimiters are same subtype as openers. 2025-02-09 16:50:52 +01:00
Sergio Martínez Portela
0bdb29a278 Don't cut delimiter lines out of get_lines_between(). 2025-02-09 16:50:52 +01:00
Sergio Martínez Portela
8b4e12ea2e Add dom.TableRow.get_raw() support. 2025-02-09 16:50:52 +01:00
Sergio Martínez Portela
dbac8b2d6e feat(dom): Add support for generic drawer outputs. 2025-02-09 16:50:52 +01:00
Sergio Martínez Portela
c0fc78fe33 fix(gitea): Fix build with newer images. 2025-02-09 14:13:28 +01:00
Sergio Martínez Portela
9c04717a12 Fix support of code blocks outside headlines.
Some checks failed
Testing / pytest (push) Failing after 1m11s
Testing / mypy (push) Failing after 17s
Testing / style-formatting (push) Failing after 15s
Testing / style-sorted-imports (push) Failing after 16s
Testing / stability-extra-test (push) Failing after 20s
2025-02-09 13:49:09 +01:00
9 changed files with 73 additions and 186 deletions

View File

@ -9,8 +9,8 @@ jobs:
- name: Check out repository code
uses: actions/checkout@v3
- run: apt-get update && apt-get install -y python3-pip
- run: pip install -e .
- run: pip install pytest
- run: pip install --break-system-package -e .
- run: pip install --break-system-package pytest
- run: pytest
mypy:
@ -19,8 +19,8 @@ jobs:
- name: Check out repository code
uses: actions/checkout@v3
- run: apt-get update && apt-get install -y python3-pip
- run: pip install -e .
- run: pip install mypy
- run: pip install --break-system-package -e .
- run: pip install --break-system-package mypy
- run: mypy org_rw --check-untyped-defs
style-formatting:
@ -29,8 +29,8 @@ jobs:
- name: Check out repository code
uses: actions/checkout@v3
- run: apt-get update && apt-get install -y python3-pip
- run: pip install -e .
- run: pip install black
- run: pip install --break-system-package -e .
- run: pip install --break-system-package black
- run: black --check .
style-sorted-imports:
@ -39,8 +39,8 @@ jobs:
- name: Check out repository code
uses: actions/checkout@v3
- run: apt-get update && apt-get install -y python3-pip
- run: pip install -e .
- run: pip install isort
- run: pip install --break-system-package -e .
- run: pip install --break-system-package isort
- run: isort --profile black --check .
stability-extra-test:
@ -49,5 +49,5 @@ jobs:
- name: Check out repository code
uses: actions/checkout@v3
- run: apt-get update && apt-get install -y git-core python3-pip
- run: pip install -e .
- run: pip install --break-system-package -e .
- run: bash extra-tests/check_all.sh

View File

@ -7,6 +7,12 @@ A python library to parse, modify and save Org-mode files.
- Modify these data and write it back to disk.
- Keep the original structure intact (indentation, spaces, format, ...).
** Principles
- Avoid any dependency outside of Python's standard library.
- Don't do anything outside of the scope of parsing/re-serializing Org-mode files.
- *Modification of the original text if there's no change is considered a bug (see [[id:7363ba38-1662-4d3c-9e83-0999824975b7][Known issues]]).*
- Data structures should be exposed as it's read on Emacs's org-mode or when in doubt as raw as possible.
- Data in the objects should be modificable as a way to update the document itself. *Consider this a Object-oriented design.*
** Safety mechanism
As this library is still in early development. Running it over files might
produce unexpected changes on them. For this reason it's heavily recommended to
@ -21,6 +27,9 @@ Also, see [[id:76e77f7f-c9e0-4c83-ad2f-39a5a8894a83][Known issues:Structure modi
not properly stored and can trigger this safety mechanism on a false-positive.
* Known issues
:PROPERTIES:
:ID: 7363ba38-1662-4d3c-9e83-0999824975b7
:END:
** Structure modifications
:PROPERTIES:
:ID: 76e77f7f-c9e0-4c83-ad2f-39a5a8894a83

View File

@ -24,6 +24,14 @@ class ResultsDrawerNode(DrawerNode):
return "<Results: {}>".format(len(self.children))
class GenericDrawerNode(DrawerNode):
def __init__(self, drawer_name):
self.drawer_name = drawer_name
def __repr__(self):
return "<Drawer{}: {}>".format(self.drawer_name, len(self.children))
class PropertyNode:
def __init__(self, key, value):
self.key = key
@ -62,12 +70,18 @@ class TableSeparatorRow:
def __init__(self, orig=None):
self.orig = orig
def get_raw(self):
return get_raw_contents(self.orig)
class TableRow:
def __init__(self, cells, orig=None):
self.cells = cells
self.orig = orig
def get_raw(self):
return get_raw_contents(self.orig)
class Text:
def __init__(self, content):

View File

@ -122,6 +122,7 @@ NON_FINISHED_GROUPS = (
dom.ListGroupNode,
dom.ResultsDrawerNode,
dom.PropertyDrawerNode,
dom.GenericDrawerNode,
)
FREE_GROUPS = (dom.CodeBlock,)
@ -414,6 +415,7 @@ class Headline:
if (
isinstance(line, DelimiterLine)
and line.delimiter_type == DelimiterLineType.END_BLOCK
and line.type_data.subtype == current_node.header.type_data.subtype
):
start = current_node.header.linenum
@ -636,6 +638,13 @@ class Headline:
assert current_node is None
current_node = dom.ResultsDrawerNode()
# TODO: Allow indentation of these blocks inside others
indentation_tree = [current_node]
tree.append(current_node)
elif content.strip().startswith(":") and content.strip().endswith(":"):
assert current_node is None
current_node = dom.GenericDrawerNode(content.strip().strip(":"))
# TODO: Allow indentation of these blocks inside others
indentation_tree = [current_node]
tree.append(current_node)
@ -864,9 +873,24 @@ class Headline:
yield from get_links_from_content(item.content)
def get_lines_between(self, start, end):
for line in self.contents:
# @TODO: Generalize for other line types too.
everything = (
[]
# + self.keywords
+ self.contents
# + self.list_items
# + self.table_rows
# + self.properties
# + self.structural
+ self.delimiters
)
for line in everything:
if start <= line.linenum < end:
yield "".join(line.get_raw())
if "get_raw" in dir(line):
yield "".join(line.get_raw())
else:
yield line.line
def get_contents(self, format):
if format == "raw":
@ -877,46 +901,6 @@ class Headline:
else:
raise NotImplementedError()
def update_raw_contents(self, new_contents):
# Clear elements
self.keywords = []
self.contents = []
self.list_items = []
self.table_rows = []
self.properties = []
self.structural = []
self.delimiters = []
self.scheduled = None
self.deadline = None
self.closed = None
reader = OrgDocReader(environment=self.doc.environment)
reader.read(new_contents)
# No need to finalize as we can take the data from the reader instead of from a doc
if len(reader.headlines) > 0:
# Probably can be done by just adding the headlines to this one's children
raise NotImplementedError(
"new headlines on raw contents not supported yet. This probably should be simple, see comment on code."
)
for kw in reader.keywords:
self.keywords.append(offset_linenum(self.start_line + 1, kw))
for content in reader.contents:
self.contents.append(offset_linenum(self.start_line + 1, content))
for list_item in reader.list_items:
self.list_items.append(offset_linenum(self.start_line + 1, list_item))
for struct_item in reader.structural:
self.structural.append(offset_linenum(self.start_line + 1, struct_item))
for prop in reader.properties:
self.properties.append(offset_linenum(self.start_line + 1, prop))
# Environment is not used, as it's known
def get_element_in_line(self, linenum):
for line in self.contents:
if linenum == line.linenum:
@ -1114,7 +1098,6 @@ Keyword = collections.namedtuple(
Property = collections.namedtuple(
"Property", ("linenum", "match", "key", "value", "options")
)
Structural = collections.namedtuple("Structural", ("linenum", "line"))
class ListItem:
@ -1163,19 +1146,6 @@ TableRow = collections.namedtuple(
),
)
ItemWithLineNum = Union[Keyword, RawLine, Property, ListItem, Structural]
def offset_linenum(offset: int, item: ItemWithLineNum) -> ItemWithLineNum:
if isinstance(item, ListItem):
item.linenum += offset
return item
assert isinstance(
item, (Keyword, RawLine, Property, Structural)
), "Expected (Keyword|RawLine|Property|Structural), found {}".format(item)
return item._replace(linenum=item.linenum + offset)
# @TODO How are [YYYY-MM-DD HH:mm--HH:mm] and ([... HH:mm]--[... HH:mm]) differentiated ?
# @TODO Consider recurrence annotations
@ -2361,6 +2331,7 @@ class OrgDoc:
list_items,
structural,
properties,
delimiters,
environment=BASE_ENVIRONMENT,
):
self.todo_keywords = [HeadlineState(name=kw) for kw in DEFAULT_TODO_KEYWORDS]
@ -2390,11 +2361,11 @@ class OrgDoc:
self.list_items: List[ListItem] = list_items
self.structural: List = structural
self.properties: List = properties
self.delimiters: List = delimiters
self._path = None
self.headlines: List[Headline] = list(
map(lambda hl: parse_headline(hl, self, self), headlines)
)
self.environment = environment
@property
def id(self):
@ -2555,6 +2526,9 @@ class OrgDoc:
for struct in self.structural:
lines.append(dump_structural(struct))
for content in self.delimiters:
lines.append(dump_delimiters(content))
for kw in self.keywords:
lines.append(dump_kw(kw))
@ -2579,8 +2553,8 @@ class OrgDocReader:
self.delimiters: List[DelimiterLine] = []
self.list_items: List[ListItem] = []
self.table_rows: List[TableRow] = []
self.structural: List[Structural] = []
self.properties: List[Property] = []
self.structural: List = []
self.properties: List = []
self.current_drawer: Optional[List] = None
self.environment = environment
@ -2592,6 +2566,7 @@ class OrgDocReader:
self.list_items,
self.structural,
self.properties,
self.delimiters,
self.environment,
)
@ -2762,7 +2737,7 @@ class OrgDocReader:
def add_property_drawer_line(self, linenum: int, line: str, match: re.Match):
if len(self.headline_hierarchy) == 0:
self.current_drawer = self.properties
self.structural.append(Structural(linenum, line))
self.structural.append((linenum, line))
else:
assert self.headline_hierarchy[-1] is not None
self.current_drawer = self.headline_hierarchy[-1]["properties"]
@ -2781,7 +2756,7 @@ class OrgDocReader:
def add_drawer_end_line(self, linenum: int, line: str, match: re.Match):
self.current_drawer = None
if len(self.headline_hierarchy) == 0:
self.structural.append(Structural(linenum, line))
self.structural.append((linenum, line))
else:
assert self.headline_hierarchy[-1] is not None
self.headline_hierarchy[-1]["structural"].append((linenum, line))

View File

@ -9,6 +9,7 @@ from .org_rw import (
ListItem,
RawLine,
Strike,
TableRow,
Text,
Underlined,
Verbatim,
@ -50,6 +51,8 @@ def get_raw_contents(doc) -> str:
return doc.get_raw()
if isinstance(doc, ListItem):
return dump_contents(doc)[1]
if isinstance(doc, TableRow):
return dump_contents(doc)[1]
print("Unhandled type: " + str(doc))
raise NotImplementedError("Unhandled type: " + str(doc))

View File

@ -1 +0,0 @@
# No external requirements at this point

View File

@ -1,22 +0,0 @@
#+TITLE: 13-Update reparse
#+DESCRIPTION: Update-Reparse org file
#+TODO: TODO(t) PAUSED(p) | DONE(d)
* First level
:PROPERTIES:
:ID: 13-update-reparse-first-level-id
:CREATED: [2020-01-01 Wed 01:01]
:END:
First level content
- A list of items ::
- With a sublist
Something after the list.
** Second level
:PROPERTIES:
:ID: 13-update-reparse-second-level-id
:END:
Second level content

View File

@ -1,5 +1,4 @@
import os
import tempfile
import unittest
from datetime import datetime as DT
@ -870,86 +869,6 @@ class TestSerde(unittest.TestCase):
self.assertEqual(dumps(doc), orig)
def test_update_reparse_same_structure(self):
with open(os.path.join(DIR, "01-simple.org")) as f:
doc = load(f)
hl = doc.getTopHeadlines()[0]
ex = HL(
"First level",
props=[
("ID", "01-simple-first-level-id"),
("CREATED", DT(2020, 1, 1, 1, 1)),
],
content=" First level content\n",
children=[
HL(
"Second level",
props=[("ID", "01-simple-second-level-id")],
content="\n Second level content\n",
children=[
HL(
"Third level",
props=[("ID", "01-simple-third-level-id")],
content="\n Third level content\n",
)
],
)
],
)
# Ground check
ex.assert_matches(self, hl)
# Update
lines = list(doc.dump_headline(hl, recursive=False))
assert lines[0].startswith("* ") # Title, skip it
content = "\n".join(lines[1:])
hl.update_raw_contents(content)
# Check after update
ex.assert_matches(self, hl, accept_trailing_whitespace_changes=True)
def test_update_reparse_same_values(self):
with open(os.path.join(DIR, "13-update-reparse-test.org")) as f:
doc = load(f)
expected_hl_contents = """ :PROPERTIES:
:ID: 13-update-reparse-first-level-id
:CREATED: [2020-01-01 Wed 01:01]
:END:
First level content
- A list of items ::
- With a sublist
Something after the list.
"""
hl = doc.getTopHeadlines()[0]
lines = list(doc.dump_headline(hl, recursive=False))
assert lines[0].startswith("* ") # Title, skip it
content = "\n".join(lines[1:])
self.assertEqual(content, expected_hl_contents)
# Check after update
hl.update_raw_contents(content)
self.assertEqual(content, expected_hl_contents)
# Check after dump and reload
with tempfile.NamedTemporaryFile("wt") as f:
save = org_rw.dumps(doc)
f.write(save)
f.flush()
with open(f.name, "rt") as reader:
reloaded = org_rw.load(reader)
re_hl = reloaded.getTopHeadlines()[0]
lines = list(doc.dump_headline(hl, recursive=False))
assert lines[0].startswith("* ") # Title, skip it
content = "\n".join(lines[1:])
self.assertEqual(content, expected_hl_contents)
def test_mimic_write_file_13(self):
with open(os.path.join(DIR, "13-tags.org")) as f:
orig = f.read()

View File

@ -67,12 +67,7 @@ class HL:
self.content = content
self.children = children
def assert_matches(
self,
test_case: unittest.TestCase,
doc,
accept_trailing_whitespace_changes=False,
):
def assert_matches(self, test_case: unittest.TestCase, doc):
test_case.assertEqual(self.title, get_raw(doc.title))
# Check properties
@ -89,12 +84,7 @@ class HL:
timestamp_to_datetime(doc_props[i].value), prop[1]
)
if accept_trailing_whitespace_changes:
test_case.assertEqual(
get_raw_contents(doc).rstrip(), self.get_raw().rstrip()
)
else:
test_case.assertEqual(get_raw_contents(doc), self.get_raw())
test_case.assertEqual(get_raw_contents(doc), self.get_raw())
# Check children
if self.children is None: