diff --git a/.gitea/workflows/tests.yaml b/.gitea/workflows/tests.yaml index f56a490..a3adf0a 100644 --- a/.gitea/workflows/tests.yaml +++ b/.gitea/workflows/tests.yaml @@ -9,8 +9,8 @@ jobs: - name: Check out repository code uses: actions/checkout@v3 - run: apt-get update && apt-get install -y python3-pip - - run: pip install -e . - - run: pip install pytest + - run: pip install --break-system-package -e . + - run: pip install --break-system-package pytest - run: pytest mypy: @@ -19,8 +19,8 @@ jobs: - name: Check out repository code uses: actions/checkout@v3 - run: apt-get update && apt-get install -y python3-pip - - run: pip install -e . - - run: pip install mypy + - run: pip install --break-system-package -e . + - run: pip install --break-system-package mypy - run: mypy org_rw --check-untyped-defs style-formatting: @@ -29,8 +29,8 @@ jobs: - name: Check out repository code uses: actions/checkout@v3 - run: apt-get update && apt-get install -y python3-pip - - run: pip install -e . - - run: pip install black + - run: pip install --break-system-package -e . + - run: pip install --break-system-package black - run: black --check . style-sorted-imports: @@ -39,8 +39,8 @@ jobs: - name: Check out repository code uses: actions/checkout@v3 - run: apt-get update && apt-get install -y python3-pip - - run: pip install -e . - - run: pip install isort + - run: pip install --break-system-package -e . + - run: pip install --break-system-package isort - run: isort --profile black --check . stability-extra-test: @@ -49,5 +49,5 @@ jobs: - name: Check out repository code uses: actions/checkout@v3 - run: apt-get update && apt-get install -y git-core python3-pip - - run: pip install -e . + - run: pip install --break-system-package -e . - run: bash extra-tests/check_all.sh diff --git a/README.org b/README.org index 95ec98a..6f03720 100644 --- a/README.org +++ b/README.org @@ -7,6 +7,12 @@ A python library to parse, modify and save Org-mode files. - Modify these data and write it back to disk. - Keep the original structure intact (indentation, spaces, format, ...). +** Principles +- Avoid any dependency outside of Python's standard library. +- Don't do anything outside of the scope of parsing/re-serializing Org-mode files. +- *Modification of the original text if there's no change is considered a bug (see [[id:7363ba38-1662-4d3c-9e83-0999824975b7][Known issues]]).* +- Data structures should be exposed as it's read on Emacs's org-mode or when in doubt as raw as possible. +- Data in the objects should be modificable as a way to update the document itself. *Consider this a Object-oriented design.* ** Safety mechanism As this library is still in early development. Running it over files might produce unexpected changes on them. For this reason it's heavily recommended to @@ -21,6 +27,9 @@ Also, see [[id:76e77f7f-c9e0-4c83-ad2f-39a5a8894a83][Known issues:Structure modi not properly stored and can trigger this safety mechanism on a false-positive. * Known issues +:PROPERTIES: +:ID: 7363ba38-1662-4d3c-9e83-0999824975b7 +:END: ** Structure modifications :PROPERTIES: :ID: 76e77f7f-c9e0-4c83-ad2f-39a5a8894a83 diff --git a/org_rw/dom.py b/org_rw/dom.py index f9ed40f..baf0092 100644 --- a/org_rw/dom.py +++ b/org_rw/dom.py @@ -24,6 +24,14 @@ class ResultsDrawerNode(DrawerNode): return "".format(len(self.children)) +class GenericDrawerNode(DrawerNode): + def __init__(self, drawer_name): + self.drawer_name = drawer_name + + def __repr__(self): + return "".format(self.drawer_name, len(self.children)) + + class PropertyNode: def __init__(self, key, value): self.key = key @@ -62,12 +70,18 @@ class TableSeparatorRow: def __init__(self, orig=None): self.orig = orig + def get_raw(self): + return get_raw_contents(self.orig) + class TableRow: def __init__(self, cells, orig=None): self.cells = cells self.orig = orig + def get_raw(self): + return get_raw_contents(self.orig) + class Text: def __init__(self, content): diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index f8cf8a8..6baadd1 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -122,6 +122,7 @@ NON_FINISHED_GROUPS = ( dom.ListGroupNode, dom.ResultsDrawerNode, dom.PropertyDrawerNode, + dom.GenericDrawerNode, ) FREE_GROUPS = (dom.CodeBlock,) @@ -414,6 +415,7 @@ class Headline: if ( isinstance(line, DelimiterLine) and line.delimiter_type == DelimiterLineType.END_BLOCK + and line.type_data.subtype == current_node.header.type_data.subtype ): start = current_node.header.linenum @@ -636,6 +638,13 @@ class Headline: assert current_node is None current_node = dom.ResultsDrawerNode() + # TODO: Allow indentation of these blocks inside others + indentation_tree = [current_node] + tree.append(current_node) + elif content.strip().startswith(":") and content.strip().endswith(":"): + assert current_node is None + current_node = dom.GenericDrawerNode(content.strip().strip(":")) + # TODO: Allow indentation of these blocks inside others indentation_tree = [current_node] tree.append(current_node) @@ -864,9 +873,24 @@ class Headline: yield from get_links_from_content(item.content) def get_lines_between(self, start, end): - for line in self.contents: + # @TODO: Generalize for other line types too. + everything = ( + [] + # + self.keywords + + self.contents + # + self.list_items + # + self.table_rows + # + self.properties + # + self.structural + + self.delimiters + ) + + for line in everything: if start <= line.linenum < end: - yield "".join(line.get_raw()) + if "get_raw" in dir(line): + yield "".join(line.get_raw()) + else: + yield line.line def get_contents(self, format): if format == "raw": @@ -877,46 +901,6 @@ class Headline: else: raise NotImplementedError() - def update_raw_contents(self, new_contents): - # Clear elements - self.keywords = [] - self.contents = [] - self.list_items = [] - self.table_rows = [] - self.properties = [] - self.structural = [] - self.delimiters = [] - self.scheduled = None - self.deadline = None - self.closed = None - - reader = OrgDocReader(environment=self.doc.environment) - reader.read(new_contents) - - # No need to finalize as we can take the data from the reader instead of from a doc - if len(reader.headlines) > 0: - # Probably can be done by just adding the headlines to this one's children - raise NotImplementedError( - "new headlines on raw contents not supported yet. This probably should be simple, see comment on code." - ) - - for kw in reader.keywords: - self.keywords.append(offset_linenum(self.start_line + 1, kw)) - - for content in reader.contents: - self.contents.append(offset_linenum(self.start_line + 1, content)) - - for list_item in reader.list_items: - self.list_items.append(offset_linenum(self.start_line + 1, list_item)) - - for struct_item in reader.structural: - self.structural.append(offset_linenum(self.start_line + 1, struct_item)) - - for prop in reader.properties: - self.properties.append(offset_linenum(self.start_line + 1, prop)) - - # Environment is not used, as it's known - def get_element_in_line(self, linenum): for line in self.contents: if linenum == line.linenum: @@ -1114,7 +1098,6 @@ Keyword = collections.namedtuple( Property = collections.namedtuple( "Property", ("linenum", "match", "key", "value", "options") ) -Structural = collections.namedtuple("Structural", ("linenum", "line")) class ListItem: @@ -1163,19 +1146,6 @@ TableRow = collections.namedtuple( ), ) -ItemWithLineNum = Union[Keyword, RawLine, Property, ListItem, Structural] - - -def offset_linenum(offset: int, item: ItemWithLineNum) -> ItemWithLineNum: - if isinstance(item, ListItem): - item.linenum += offset - return item - - assert isinstance( - item, (Keyword, RawLine, Property, Structural) - ), "Expected (Keyword|RawLine|Property|Structural), found {}".format(item) - return item._replace(linenum=item.linenum + offset) - # @TODO How are [YYYY-MM-DD HH:mm--HH:mm] and ([... HH:mm]--[... HH:mm]) differentiated ? # @TODO Consider recurrence annotations @@ -2361,6 +2331,7 @@ class OrgDoc: list_items, structural, properties, + delimiters, environment=BASE_ENVIRONMENT, ): self.todo_keywords = [HeadlineState(name=kw) for kw in DEFAULT_TODO_KEYWORDS] @@ -2390,11 +2361,11 @@ class OrgDoc: self.list_items: List[ListItem] = list_items self.structural: List = structural self.properties: List = properties + self.delimiters: List = delimiters self._path = None self.headlines: List[Headline] = list( map(lambda hl: parse_headline(hl, self, self), headlines) ) - self.environment = environment @property def id(self): @@ -2555,6 +2526,9 @@ class OrgDoc: for struct in self.structural: lines.append(dump_structural(struct)) + for content in self.delimiters: + lines.append(dump_delimiters(content)) + for kw in self.keywords: lines.append(dump_kw(kw)) @@ -2579,8 +2553,8 @@ class OrgDocReader: self.delimiters: List[DelimiterLine] = [] self.list_items: List[ListItem] = [] self.table_rows: List[TableRow] = [] - self.structural: List[Structural] = [] - self.properties: List[Property] = [] + self.structural: List = [] + self.properties: List = [] self.current_drawer: Optional[List] = None self.environment = environment @@ -2592,6 +2566,7 @@ class OrgDocReader: self.list_items, self.structural, self.properties, + self.delimiters, self.environment, ) @@ -2762,7 +2737,7 @@ class OrgDocReader: def add_property_drawer_line(self, linenum: int, line: str, match: re.Match): if len(self.headline_hierarchy) == 0: self.current_drawer = self.properties - self.structural.append(Structural(linenum, line)) + self.structural.append((linenum, line)) else: assert self.headline_hierarchy[-1] is not None self.current_drawer = self.headline_hierarchy[-1]["properties"] @@ -2781,7 +2756,7 @@ class OrgDocReader: def add_drawer_end_line(self, linenum: int, line: str, match: re.Match): self.current_drawer = None if len(self.headline_hierarchy) == 0: - self.structural.append(Structural(linenum, line)) + self.structural.append((linenum, line)) else: assert self.headline_hierarchy[-1] is not None self.headline_hierarchy[-1]["structural"].append((linenum, line)) diff --git a/org_rw/utils.py b/org_rw/utils.py index 5b8b4e5..87f6712 100644 --- a/org_rw/utils.py +++ b/org_rw/utils.py @@ -9,6 +9,7 @@ from .org_rw import ( ListItem, RawLine, Strike, + TableRow, Text, Underlined, Verbatim, @@ -50,6 +51,8 @@ def get_raw_contents(doc) -> str: return doc.get_raw() if isinstance(doc, ListItem): return dump_contents(doc)[1] + if isinstance(doc, TableRow): + return dump_contents(doc)[1] print("Unhandled type: " + str(doc)) raise NotImplementedError("Unhandled type: " + str(doc)) diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 1c51c66..0000000 --- a/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -# No external requirements at this point diff --git a/tests/13-update-reparse-test.org b/tests/13-update-reparse-test.org deleted file mode 100644 index 97eee86..0000000 --- a/tests/13-update-reparse-test.org +++ /dev/null @@ -1,22 +0,0 @@ -#+TITLE: 13-Update reparse -#+DESCRIPTION: Update-Reparse org file -#+TODO: TODO(t) PAUSED(p) | DONE(d) - - -* First level - :PROPERTIES: - :ID: 13-update-reparse-first-level-id - :CREATED: [2020-01-01 Wed 01:01] - :END: - First level content - - - A list of items :: - - With a sublist - - Something after the list. - -** Second level - :PROPERTIES: - :ID: 13-update-reparse-second-level-id - :END: - Second level content diff --git a/tests/test_org.py b/tests/test_org.py index 6c6180f..a1fdff1 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -1,5 +1,4 @@ import os -import tempfile import unittest from datetime import datetime as DT @@ -870,86 +869,6 @@ class TestSerde(unittest.TestCase): self.assertEqual(dumps(doc), orig) - def test_update_reparse_same_structure(self): - with open(os.path.join(DIR, "01-simple.org")) as f: - doc = load(f) - - hl = doc.getTopHeadlines()[0] - ex = HL( - "First level", - props=[ - ("ID", "01-simple-first-level-id"), - ("CREATED", DT(2020, 1, 1, 1, 1)), - ], - content=" First level content\n", - children=[ - HL( - "Second level", - props=[("ID", "01-simple-second-level-id")], - content="\n Second level content\n", - children=[ - HL( - "Third level", - props=[("ID", "01-simple-third-level-id")], - content="\n Third level content\n", - ) - ], - ) - ], - ) - - # Ground check - ex.assert_matches(self, hl) - - # Update - lines = list(doc.dump_headline(hl, recursive=False)) - assert lines[0].startswith("* ") # Title, skip it - content = "\n".join(lines[1:]) - hl.update_raw_contents(content) - - # Check after update - ex.assert_matches(self, hl, accept_trailing_whitespace_changes=True) - - def test_update_reparse_same_values(self): - with open(os.path.join(DIR, "13-update-reparse-test.org")) as f: - doc = load(f) - - expected_hl_contents = """ :PROPERTIES: - :ID: 13-update-reparse-first-level-id - :CREATED: [2020-01-01 Wed 01:01] - :END: - First level content - - - A list of items :: - - With a sublist - - Something after the list. -""" - - hl = doc.getTopHeadlines()[0] - lines = list(doc.dump_headline(hl, recursive=False)) - assert lines[0].startswith("* ") # Title, skip it - content = "\n".join(lines[1:]) - self.assertEqual(content, expected_hl_contents) - - # Check after update - hl.update_raw_contents(content) - self.assertEqual(content, expected_hl_contents) - - # Check after dump and reload - with tempfile.NamedTemporaryFile("wt") as f: - save = org_rw.dumps(doc) - f.write(save) - f.flush() - - with open(f.name, "rt") as reader: - reloaded = org_rw.load(reader) - re_hl = reloaded.getTopHeadlines()[0] - lines = list(doc.dump_headline(hl, recursive=False)) - assert lines[0].startswith("* ") # Title, skip it - content = "\n".join(lines[1:]) - self.assertEqual(content, expected_hl_contents) - def test_mimic_write_file_13(self): with open(os.path.join(DIR, "13-tags.org")) as f: orig = f.read() diff --git a/tests/utils/assertions.py b/tests/utils/assertions.py index 47ab637..9012d49 100644 --- a/tests/utils/assertions.py +++ b/tests/utils/assertions.py @@ -67,12 +67,7 @@ class HL: self.content = content self.children = children - def assert_matches( - self, - test_case: unittest.TestCase, - doc, - accept_trailing_whitespace_changes=False, - ): + def assert_matches(self, test_case: unittest.TestCase, doc): test_case.assertEqual(self.title, get_raw(doc.title)) # Check properties @@ -89,12 +84,7 @@ class HL: timestamp_to_datetime(doc_props[i].value), prop[1] ) - if accept_trailing_whitespace_changes: - test_case.assertEqual( - get_raw_contents(doc).rstrip(), self.get_raw().rstrip() - ) - else: - test_case.assertEqual(get_raw_contents(doc), self.get_raw()) + test_case.assertEqual(get_raw_contents(doc), self.get_raw()) # Check children if self.children is None: