From fe454bd85e3c37c823d842184e5fb014801d5fed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Wed, 10 Feb 2021 00:21:37 +0100 Subject: [PATCH] Draft very basic list parser. --- org_rw/org_rw.py | 75 ++++++++++++++++++++++++++++++++++++++++++---- tests/06-lists.org | 39 ++++++++++++++++++++++++ tests/test_org.py | 17 +++++++++++ 3 files changed, 125 insertions(+), 6 deletions(-) create mode 100644 tests/06-lists.org diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 77a6eb3..625d67a 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -83,6 +83,9 @@ PLANNING_RE = re.compile( + r"[>\]])?)\s*" r")+\s*" ) +LIST_ITEM_RE = re.compile( + r"(?P\s*)((?P[*\-+])|((?P\d|[a-zA-Z])(?P[.)])))((?P)\[(?P[ Xx])\])?((?P\s*)(?P.*?)::)?(?P.*)" +) # Org-Babel BEGIN_SRC_RE = re.compile(r"^\s*#\+BEGIN_SRC(?P.*)$", re.I) @@ -90,9 +93,6 @@ END_SRC_RE = re.compile(r"^\s*#\+END_SRC\s*$", re.I) RESULTS_DRAWER_RE = re.compile(r"^\s*:results:\s*$", re.I) CodeSnippet = collections.namedtuple("CodeSnippet", ("name", "content", "result")) -# BASE_TIME_RANGE_RE = (r'(?P\d{4})-(?P\d{2})-(?P\d{2}) (?P[^ ]+)((?P\d{1,2}):(?P\d{1,2}))?', -# r'(?P\d{4})-(?P\d{2})-(?P\d{2}) (?P[^ ]+)((?P\d{1,2}):(?P\d{1,2}))?') - def get_tokens(value): if isinstance(value, Text): @@ -182,6 +182,7 @@ class Headline: children, structural, delimiters, + list_items, parent, is_todo, is_done, @@ -203,6 +204,7 @@ class Headline: self.children = children self.structural = structural self.delimiters = delimiters + self.list_items = list_items self.parent = parent self.is_todo = is_todo self.is_done = is_done @@ -243,6 +245,19 @@ class Headline: # Remove from contents self._remove_element_in_line(start_line + 1) + def getLists(self): + lists = [] + last_line = None + + for li in self.list_items: + if last_line == li.linenum - 1: + lists[-1].append(li) + else: + lists.append([li]) + + last_line = li.linenum + return lists + def get_planning_line(self): if self.scheduled is None and self.closed is None and self.deadline is None: return None @@ -455,6 +470,23 @@ Property = collections.namedtuple( "Property", ("linenum", "match", "key", "value", "options") ) +ListItem = collections.namedtuple( + "ListItem", + ( + "linenum", + "match", + "indentation", + "bullet", + "counter", + "counter_sep", + "checkbox_indentation", + "checkbox_value", + "tag_indentation", + "tag", + "content", + ), +) + # @TODO How are [YYYY-MM-DD HH:mm--HH:mm] and ([... HH:mm]--[... HH:mm]) differentiated ? # @TODO Consider recurrence annotations class Timestamp: @@ -1117,6 +1149,9 @@ def dump_contents(raw): if isinstance(raw, RawLine): return (raw.linenum, raw.line) + elif isinstance(raw, ListItem): + return (raw.linenum, raw.match.group(0)) + return (raw.linenum, raw.get_raw()) @@ -1166,6 +1201,7 @@ def parse_headline(hl, doc, parent) -> Headline: properties=hl["properties"], structural=hl["structural"], delimiters=hl["delimiters"], + list_items=hl["list_items"], title_start=None, priority=None, priority_start=None, @@ -1305,6 +1341,9 @@ class OrgDoc: for content in headline.contents: lines.append((CONTENT_T, dump_contents(content))) + for li in headline.list_items: + lines.append((CONTENT_T, dump_contents(li))) + for prop in headline.properties: lines.append((PROPERTIES_T, self.dump_property(prop))) @@ -1378,6 +1417,7 @@ class OrgDocReader: self.headline_hierarchy: List[OrgDoc] = [] self.contents: List[RawLine] = [] self.delimiters: List[DelimiterLine] = [] + self.list_items: List[ListItem] = [] def finalize(self): return OrgDoc(self.headlines, self.keywords, self.contents) @@ -1400,6 +1440,7 @@ class OrgDocReader: "structural": [], "delimiters": [], "results": [], # TODO: Move to each specific code block? + "list_items": [], } while (depth - 2) > len(self.headline_hierarchy): @@ -1414,6 +1455,26 @@ class OrgDocReader: self.headline_hierarchy[-1]["children"].append(headline) self.headline_hierarchy.append(headline) + def add_list_item_line(self, linenum: int, match: re.Match) -> int: + li = ListItem( + linenum, + match, + match.group("indentation"), + match.group("bullet"), + match.group("counter"), + match.group("counter_sep"), + match.group("checkbox_indentation"), + match.group("checkbox_value"), + match.group("tag_indentation"), + match.group("tag"), + match.group("content"), + ) + + if len(self.headline_hierarchy) == 0: + self.list_items.append(li) + else: + self.headline_hierarchy[-1]["list_items"].append(li) + def add_keyword_line(self, linenum: int, match: re.Match) -> int: options = match.group("options") kw = Keyword( @@ -1490,10 +1551,12 @@ class OrgDocReader: for lnum, line in reader: linenum = lnum + 1 try: - if m := RAW_LINE_RE.match(line): - self.add_raw_line(linenum, line) - elif m := HEADLINE_RE.match(line): + if m := HEADLINE_RE.match(line): self.add_headline(linenum, m) + elif m := LIST_ITEM_RE.match(line): + self.add_list_item_line(linenum, m) + elif m := RAW_LINE_RE.match(line): + self.add_raw_line(linenum, line) # Org-babel elif m := BEGIN_SRC_RE.match(line): self.add_begin_src_line(linenum, m) diff --git a/tests/06-lists.org b/tests/06-lists.org new file mode 100644 index 0000000..97ef57c --- /dev/null +++ b/tests/06-lists.org @@ -0,0 +1,39 @@ +#+TITLE: 06-Links +#+DESCRIPTION: Simple org file to test links +#+TODO: TODO(t) PAUSED(p) | DONE(d) + + +* Simple lists + :PROPERTIES: + :ID: 06-lists-simple + :CREATED: [2020-01-01 Wed 01:01] + :END: + + - This is a simple list. + - This list has multiple elements, with _markup_. + +Also represented as + + + This is a simple list. + + This list has multiple elements, with _markup_. + +Also represented as + + * This is a simple list. + * This list has multiple elements, with _markup_. + + +* Numbered lists + :PROPERTIES: + :ID: 06-lists-numbered + :CREATED: [2020-01-01 Wed 01:01] + :END: + + + 1. First element + 2. Second element + +Also represented as + + 1) First element + 2) Second element diff --git a/tests/test_org.py b/tests/test_org.py index 0f8c90e..7730874 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -442,3 +442,20 @@ class TestSerde(unittest.TestCase): self.assertEqual( hl.deadline.time, Timestamp(True, 2020, 12, 17, None, None, None) ) + + def test_mimic_write_file_06(self): + with open(os.path.join(DIR, "06-lists.org")) as f: + orig = f.read() + doc = loads(orig) + + self.assertEqual(dumps(doc), orig) + + def test_structure_file_06(self): + with open(os.path.join(DIR, "06-lists.org")) as f: + orig = f.read() + doc = loads(orig) + + hl = doc.getTopHeadlines()[0] + # ... + lists = hl.getLists() + self.assertEqual(len(lists), 3)