feat: Add base support for tables.

This commit is contained in:
Sergio Martínez Portela 2022-09-27 23:36:32 +02:00
parent 7b7c186b83
commit c2968283f3
3 changed files with 113 additions and 3 deletions

View File

@ -62,7 +62,7 @@ DRAWER_END_RE = re.compile(r"^(?P<indentation>\s*):END:(?P<end_indentation>\s*)$
NODE_PROPERTIES_RE = re.compile( NODE_PROPERTIES_RE = re.compile(
r"^(?P<indentation>\s*):(?P<key>[^ ()+:]+)(?P<plus>\+)?:(?P<spacing>\s*)(?P<value>.+)$" r"^(?P<indentation>\s*):(?P<key>[^ ()+:]+)(?P<plus>\+)?:(?P<spacing>\s*)(?P<value>.+)$"
) )
RAW_LINE_RE = re.compile(r"^\s*([^\s#:*]|$)") RAW_LINE_RE = re.compile(r"^\s*([^\s#:*|]|$)")
BASE_TIME_STAMP_RE = r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})( ?(?P<dow>[^ ]+))?( (?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2})(-+(?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?)?(?P<repetition> (?P<rep_mark>(\+|\+\+|\.\+|-|--))(?P<rep_value>\d+)(?P<rep_unit>[hdwmy]))?" BASE_TIME_STAMP_RE = r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})( ?(?P<dow>[^ ]+))?( (?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2})(-+(?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?)?(?P<repetition> (?P<rep_mark>(\+|\+\+|\.\+|-|--))(?P<rep_value>\d+)(?P<rep_unit>[hdwmy]))?"
CLEAN_TIME_STAMP_RE = r"\d{4}-\d{2}-\d{2}( ?([^ ]+))?( (\d{1,2}):(\d{1,2})(-+(\d{1,2}):(\d{1,2}))?)?( (\+|\+\+|\.\+|-|--)\d+[hdwmy])?" CLEAN_TIME_STAMP_RE = r"\d{4}-\d{2}-\d{2}( ?([^ ]+))?( (\d{1,2}):(\d{1,2})(-+(\d{1,2}):(\d{1,2}))?)?( (\+|\+\+|\.\+|-|--)\d+[hdwmy])?"
@ -253,6 +253,7 @@ class Headline:
structural, structural,
delimiters, delimiters,
list_items, list_items,
table_rows,
parent, parent,
is_todo, is_todo,
is_done, is_done,
@ -277,6 +278,7 @@ class Headline:
self.structural = structural self.structural = structural
self.delimiters = delimiters self.delimiters = delimiters
self.list_items = list_items self.list_items = list_items
self.table_rows = table_rows
self.parent = parent self.parent = parent
self.is_todo = is_todo self.is_todo = is_todo
self.is_done = is_done self.is_done = is_done
@ -485,7 +487,7 @@ class Headline:
return tree return tree
def getLists(self): def get_lists(self):
lists = [] lists = []
last_line = None last_line = None
@ -498,6 +500,22 @@ class Headline:
last_line = li.linenum last_line = li.linenum
return lists return lists
def getLists(self):
return self.get_lists()
def get_tables(self):
tables = []
last_line = None
for row in self.table_rows:
if last_line == row.linenum - 1:
tables[-1].append(row)
else:
tables.append([row])
last_line = row.linenum
return tables
def get_planning_line(self): def get_planning_line(self):
if self.scheduled is None and self.closed is None and self.deadline is None: if self.scheduled is None and self.closed is None and self.deadline is None:
return None return None
@ -614,7 +632,7 @@ class Headline:
for content in self.contents: for content in self.contents:
yield from get_links_from_content(content) yield from get_links_from_content(content)
for lst in self.getLists(): for lst in self.get_lists():
for item in lst: for item in lst:
yield from get_links_from_content(item.content) yield from get_links_from_content(item.content)
@ -779,6 +797,16 @@ ListItem = collections.namedtuple(
"content", "content",
), ),
) )
TableRow = collections.namedtuple(
"TableRow",
(
"linenum",
"indentation",
"suffix",
"last_cell_closed",
"cells",
),
)
# @TODO How are [YYYY-MM-DD HH:mm--HH:mm] and ([... HH:mm]--[... HH:mm]) differentiated ? # @TODO How are [YYYY-MM-DD HH:mm--HH:mm] and ([... HH:mm]--[... HH:mm]) differentiated ?
# @TODO Consider recurrence annotations # @TODO Consider recurrence annotations
@ -1484,6 +1512,13 @@ def dump_contents(raw):
f"{raw.indentation}{bullet}{checkbox}{tag}{content}", f"{raw.indentation}{bullet}{checkbox}{tag}{content}",
) )
elif isinstance(raw, TableRow):
closed = '|' if raw.last_cell_closed else ''
return (
raw.linenum,
f"{' ' * raw.indentation}|{'|'.join(raw.cells)}{closed}{raw.suffix}",
)
return (raw.linenum, raw.get_raw()) return (raw.linenum, raw.get_raw())
@ -1537,6 +1572,7 @@ def parse_headline(hl, doc, parent) -> Headline:
structural=hl["structural"], structural=hl["structural"],
delimiters=hl["delimiters"], delimiters=hl["delimiters"],
list_items=hl["list_items"], list_items=hl["list_items"],
table_rows=hl["table_rows"],
title_start=None, title_start=None,
priority=None, priority=None,
priority_start=None, priority_start=None,
@ -1718,6 +1754,9 @@ class OrgDoc:
for li in headline.list_items: for li in headline.list_items:
lines.append((CONTENT_T, dump_contents(li))) lines.append((CONTENT_T, dump_contents(li)))
for row in headline.table_rows:
lines.append((CONTENT_T, dump_contents(row)))
for prop in headline.properties: for prop in headline.properties:
lines.append((PROPERTIES_T, dump_property(prop))) lines.append((PROPERTIES_T, dump_property(prop)))
@ -1796,6 +1835,7 @@ class OrgDocReader:
self.contents: List[RawLine] = [] self.contents: List[RawLine] = []
self.delimiters: List[DelimiterLine] = [] self.delimiters: List[DelimiterLine] = []
self.list_items: List[ListItem] = [] self.list_items: List[ListItem] = []
self.table_rows: List[TableRow] = []
self.structural: List = [] self.structural: List = []
self.properties: List = [] self.properties: List = []
@ -1828,6 +1868,7 @@ class OrgDocReader:
"delimiters": [], "delimiters": [],
"results": [], # TODO: Move to each specific code block? "results": [], # TODO: Move to each specific code block?
"list_items": [], "list_items": [],
"table_rows": [],
} }
while (depth - 1) > len(self.headline_hierarchy): while (depth - 1) > len(self.headline_hierarchy):
@ -1874,6 +1915,31 @@ class OrgDocReader:
else: else:
self.headline_hierarchy[-1]["list_items"].append(li) self.headline_hierarchy[-1]["list_items"].append(li)
def add_table_line(self, linenum: int, line: str) -> int:
chunks = line.split('|')
indentation = len(chunks[0])
if chunks[-1].strip() == '':
suffix = chunks[-1]
cells = chunks[1:-1]
last_cell_closed = True
else:
suffix = ''
cells = chunks[1:]
last_cell_closed = False
row = TableRow(
linenum,
indentation,
suffix,
last_cell_closed,
cells,
)
if len(self.headline_hierarchy) == 0:
self.table_rows.append(row)
else:
self.headline_hierarchy[-1]["table_rows"].append(row)
def add_keyword_line(self, linenum: int, match: re.Match) -> int: def add_keyword_line(self, linenum: int, match: re.Match) -> int:
options = match.group("options") options = match.group("options")
kw = Keyword( kw = Keyword(
@ -1995,6 +2061,8 @@ class OrgDocReader:
in_drawer = True in_drawer = True
elif m := NODE_PROPERTIES_RE.match(line): elif m := NODE_PROPERTIES_RE.match(line):
self.add_node_properties_line(linenum, m) self.add_node_properties_line(linenum, m)
elif line.strip().startswith('|'):
self.add_table_line(linenum, line)
# Not captured # Not captured
else: else:
self.add_raw_line(linenum, line) self.add_raw_line(linenum, line)

18
tests/10-tables.org Normal file
View File

@ -0,0 +1,18 @@
#+TITLE: 10-Tables
#+DESCRIPTION: Table test
#+TODO: TODO(t) PAUSED(p) | DONE(d)
* Simple table
:PROPERTIES:
:ID: 10-table-test-id
:CREATED: [2020-01-01 Wed 01:01]
:END:
Content just before the table.
| Header1 | Header2 | Header3 |
|------------+------------+------------|
| Content1-1 | Content1-2 | Content1-3 (last cell unclosed)
| Content2-1 | Content2-2 | Content2-3 |
Content after the table.

View File

@ -634,3 +634,27 @@ class TestSerde(unittest.TestCase):
MarkerToken(closing=True, tok_type=MarkerType.UNDERLINED_MODE), MarkerToken(closing=True, tok_type=MarkerType.UNDERLINED_MODE),
' markup', ' markup',
]) ])
def test_mimic_write_file_10(self):
with open(os.path.join(DIR, "10-tables.org")) as f:
orig = f.read()
doc = loads(orig)
self.assertEqual(dumps(doc), orig)
def test_tables_file_10(self):
with open(os.path.join(DIR, "10-tables.org")) as f:
doc = load(f)
hl = doc.getTopHeadlines()[0]
tables = hl.get_tables()
first_table = tables[0]
self.assertEqual(len(first_table), 4)
print(first_table[0])
self.assertEqual(len(first_table[0].cells), 3)
self.assertEqual(first_table[0].cells[0].strip(), 'Header1')
self.assertEqual(first_table[0].cells[1].strip(), 'Header2')
self.assertEqual(first_table[0].cells[2].strip(), 'Header3')