diff --git a/org_rw/org_rw.py b/org_rw/org_rw.py index 42e1c95..98b2d68 100644 --- a/org_rw/org_rw.py +++ b/org_rw/org_rw.py @@ -62,7 +62,7 @@ DRAWER_END_RE = re.compile(r"^(?P\s*):END:(?P\s*)$ NODE_PROPERTIES_RE = re.compile( r"^(?P\s*):(?P[^ ()+:]+)(?P\+)?:(?P\s*)(?P.+)$" ) -RAW_LINE_RE = re.compile(r"^\s*([^\s#:*]|$)") +RAW_LINE_RE = re.compile(r"^\s*([^\s#:*|]|$)") BASE_TIME_STAMP_RE = r"(?P\d{4})-(?P\d{2})-(?P\d{2})( ?(?P[^ ]+))?( (?P\d{1,2}):(?P\d{1,2})(-+(?P\d{1,2}):(?P\d{1,2}))?)?(?P (?P(\+|\+\+|\.\+|-|--))(?P\d+)(?P[hdwmy]))?" CLEAN_TIME_STAMP_RE = r"\d{4}-\d{2}-\d{2}( ?([^ ]+))?( (\d{1,2}):(\d{1,2})(-+(\d{1,2}):(\d{1,2}))?)?( (\+|\+\+|\.\+|-|--)\d+[hdwmy])?" @@ -253,6 +253,7 @@ class Headline: structural, delimiters, list_items, + table_rows, parent, is_todo, is_done, @@ -277,6 +278,7 @@ class Headline: self.structural = structural self.delimiters = delimiters self.list_items = list_items + self.table_rows = table_rows self.parent = parent self.is_todo = is_todo self.is_done = is_done @@ -485,7 +487,7 @@ class Headline: return tree - def getLists(self): + def get_lists(self): lists = [] last_line = None @@ -498,6 +500,22 @@ class Headline: last_line = li.linenum return lists + def getLists(self): + return self.get_lists() + + def get_tables(self): + tables = [] + last_line = None + + for row in self.table_rows: + if last_line == row.linenum - 1: + tables[-1].append(row) + else: + tables.append([row]) + + last_line = row.linenum + return tables + def get_planning_line(self): if self.scheduled is None and self.closed is None and self.deadline is None: return None @@ -614,7 +632,7 @@ class Headline: for content in self.contents: yield from get_links_from_content(content) - for lst in self.getLists(): + for lst in self.get_lists(): for item in lst: yield from get_links_from_content(item.content) @@ -779,6 +797,16 @@ ListItem = collections.namedtuple( "content", ), ) +TableRow = collections.namedtuple( + "TableRow", + ( + "linenum", + "indentation", + "suffix", + "last_cell_closed", + "cells", + ), +) # @TODO How are [YYYY-MM-DD HH:mm--HH:mm] and ([... HH:mm]--[... HH:mm]) differentiated ? # @TODO Consider recurrence annotations @@ -1484,6 +1512,13 @@ def dump_contents(raw): f"{raw.indentation}{bullet}{checkbox}{tag}{content}", ) + elif isinstance(raw, TableRow): + closed = '|' if raw.last_cell_closed else '' + return ( + raw.linenum, + f"{' ' * raw.indentation}|{'|'.join(raw.cells)}{closed}{raw.suffix}", + ) + return (raw.linenum, raw.get_raw()) @@ -1537,6 +1572,7 @@ def parse_headline(hl, doc, parent) -> Headline: structural=hl["structural"], delimiters=hl["delimiters"], list_items=hl["list_items"], + table_rows=hl["table_rows"], title_start=None, priority=None, priority_start=None, @@ -1718,6 +1754,9 @@ class OrgDoc: for li in headline.list_items: lines.append((CONTENT_T, dump_contents(li))) + for row in headline.table_rows: + lines.append((CONTENT_T, dump_contents(row))) + for prop in headline.properties: lines.append((PROPERTIES_T, dump_property(prop))) @@ -1796,6 +1835,7 @@ class OrgDocReader: self.contents: List[RawLine] = [] self.delimiters: List[DelimiterLine] = [] self.list_items: List[ListItem] = [] + self.table_rows: List[TableRow] = [] self.structural: List = [] self.properties: List = [] @@ -1828,6 +1868,7 @@ class OrgDocReader: "delimiters": [], "results": [], # TODO: Move to each specific code block? "list_items": [], + "table_rows": [], } while (depth - 1) > len(self.headline_hierarchy): @@ -1874,6 +1915,31 @@ class OrgDocReader: else: self.headline_hierarchy[-1]["list_items"].append(li) + def add_table_line(self, linenum: int, line: str) -> int: + chunks = line.split('|') + indentation = len(chunks[0]) + if chunks[-1].strip() == '': + suffix = chunks[-1] + cells = chunks[1:-1] + last_cell_closed = True + else: + suffix = '' + cells = chunks[1:] + last_cell_closed = False + + row = TableRow( + linenum, + indentation, + suffix, + last_cell_closed, + cells, + ) + + if len(self.headline_hierarchy) == 0: + self.table_rows.append(row) + else: + self.headline_hierarchy[-1]["table_rows"].append(row) + def add_keyword_line(self, linenum: int, match: re.Match) -> int: options = match.group("options") kw = Keyword( @@ -1995,6 +2061,8 @@ class OrgDocReader: in_drawer = True elif m := NODE_PROPERTIES_RE.match(line): self.add_node_properties_line(linenum, m) + elif line.strip().startswith('|'): + self.add_table_line(linenum, line) # Not captured else: self.add_raw_line(linenum, line) diff --git a/tests/10-tables.org b/tests/10-tables.org new file mode 100644 index 0000000..d9d404b --- /dev/null +++ b/tests/10-tables.org @@ -0,0 +1,18 @@ +#+TITLE: 10-Tables +#+DESCRIPTION: Table test +#+TODO: TODO(t) PAUSED(p) | DONE(d) + + +* Simple table + :PROPERTIES: + :ID: 10-table-test-id + :CREATED: [2020-01-01 Wed 01:01] + :END: + + Content just before the table. + | Header1 | Header2 | Header3 | + |------------+------------+------------| + | Content1-1 | Content1-2 | Content1-3 (last cell unclosed) + | Content2-1 | Content2-2 | Content2-3 | + Content after the table. + diff --git a/tests/test_org.py b/tests/test_org.py index 0cd6136..d5682d3 100644 --- a/tests/test_org.py +++ b/tests/test_org.py @@ -634,3 +634,27 @@ class TestSerde(unittest.TestCase): MarkerToken(closing=True, tok_type=MarkerType.UNDERLINED_MODE), ' markup', ]) + + def test_mimic_write_file_10(self): + with open(os.path.join(DIR, "10-tables.org")) as f: + orig = f.read() + doc = loads(orig) + + self.assertEqual(dumps(doc), orig) + + def test_tables_file_10(self): + with open(os.path.join(DIR, "10-tables.org")) as f: + doc = load(f) + + hl = doc.getTopHeadlines()[0] + + tables = hl.get_tables() + first_table = tables[0] + self.assertEqual(len(first_table), 4) + + print(first_table[0]) + self.assertEqual(len(first_table[0].cells), 3) + self.assertEqual(first_table[0].cells[0].strip(), 'Header1') + self.assertEqual(first_table[0].cells[1].strip(), 'Header2') + self.assertEqual(first_table[0].cells[2].strip(), 'Header3') +