Add base dumping/serialization mechanism.
This commit is contained in:
parent
d29058cb5e
commit
d23ee1adba
@ -1 +1 @@
|
|||||||
from .org_dom import OrgDom, load, loads
|
from .org_dom import *
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
import collections
|
import collections
|
||||||
from typing import List
|
from typing import List, Tuple
|
||||||
|
|
||||||
BASE_ENVIRONMENT = {
|
BASE_ENVIRONMENT = {
|
||||||
'org-footnote-section': 'Footnotes',
|
'org-footnote-section': 'Footnotes',
|
||||||
@ -62,10 +63,17 @@ Headline = collections.namedtuple('Headline', ('start_line', 'depth',
|
|||||||
'children',
|
'children',
|
||||||
))
|
))
|
||||||
|
|
||||||
Property = collections.namedtuple('Property', ('name', 'value', 'options'))
|
RawLine = collections.namedtuple('RawLine', ('linenum', 'line'))
|
||||||
TimeRange = collections.namedtuple('TimeRange', ('start_time', 'end_time'))
|
Keyword = collections.namedtuple('Keyword', ('linenum', 'match', 'key', 'value', 'options'))
|
||||||
Timestamp = collections.namedtuple('Timestamp', ('year', 'month', 'day', 'dow', 'hour', 'minute'))
|
Property = collections.namedtuple('Property', ('linenum', 'match', 'key', 'value', 'options'))
|
||||||
|
|
||||||
|
# @TODO How are [YYYY-MM-DD HH:mm--HH:mm] and ([... HH:mm]--[... HH:mm]) differentiated ?
|
||||||
|
# @TODO Consider recurrence annotations
|
||||||
|
TimeRange = collections.namedtuple('TimeRange', ('start_time', 'end_time'))
|
||||||
|
Timestamp = collections.namedtuple('Timestamp', ('active', 'year', 'month', 'day', 'dow', 'hour', 'minute'))
|
||||||
|
|
||||||
|
BEGIN_PROPERTIES = 'OPEN_PROPERTIES'
|
||||||
|
END_PROPERTIES = 'CLOSE_PROPERTIES'
|
||||||
|
|
||||||
def parse_org_time(value):
|
def parse_org_time(value):
|
||||||
if m := ACTIVE_TIME_STAMP_RE.match(value):
|
if m := ACTIVE_TIME_STAMP_RE.match(value):
|
||||||
@ -76,42 +84,152 @@ def parse_org_time(value):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
if m.group('end_hour'):
|
if m.group('end_hour'):
|
||||||
return TimeRange(Timestamp(int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('start_hour')), int(m.group('start_minute'))),
|
return TimeRange(Timestamp(active, int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('start_hour')), int(m.group('start_minute'))),
|
||||||
Timestamp(int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('end_hour')), int(m.group('end_minute'))))
|
Timestamp(active, int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('end_hour')), int(m.group('end_minute'))))
|
||||||
return Timestamp(int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('start_hour')), int(m.group('start_minute')))
|
return Timestamp(active, int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('start_hour')), int(m.group('start_minute')))
|
||||||
|
|
||||||
|
def timestamp_to_string(ts):
|
||||||
|
date = '{year}-{month:02d}-{day:02d}'.format(
|
||||||
|
year=ts.year,
|
||||||
|
month=ts.month,
|
||||||
|
day=ts.day
|
||||||
|
)
|
||||||
|
if ts.dow:
|
||||||
|
date = date + ' ' + ts.dow
|
||||||
|
|
||||||
|
if ts.hour is not None:
|
||||||
|
base = '{date} {hour:02}:{minute:02d}'.format(date=date, hour=ts.hour, minute=ts.minute)
|
||||||
|
else:
|
||||||
|
base = date
|
||||||
|
|
||||||
|
if ts.active:
|
||||||
|
return '<{}>'.format(base)
|
||||||
|
else:
|
||||||
|
return '[{}]'.format(base)
|
||||||
|
|
||||||
class OrgDom:
|
class OrgDom:
|
||||||
def __init__(self, headlines, keywords):
|
def __init__(self, headlines, keywords, contents):
|
||||||
self.headlines: List[Headline] = headlines
|
self.headlines: List[Headline] = headlines
|
||||||
self.keywords: List[Property] = keywords
|
self.keywords: List[Property] = keywords
|
||||||
|
self.contents: List[RawLine] = contents
|
||||||
|
|
||||||
def serialize(self):
|
def serialize(self):
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
## Querying
|
## Querying
|
||||||
def getProperties(self):
|
def getProperties(self):
|
||||||
return [
|
return self.keywords
|
||||||
Property(name=kw.group('key'),
|
|
||||||
value=kw.group('value'),
|
|
||||||
options=kw.group('options'),
|
|
||||||
)
|
|
||||||
for kw in self.keywords
|
|
||||||
]
|
|
||||||
|
|
||||||
def getTopHeadlines(self):
|
def getTopHeadlines(self):
|
||||||
return self.headlines
|
return self.headlines
|
||||||
|
|
||||||
|
# Writing
|
||||||
|
def dump_kw(self, kw):
|
||||||
|
options = kw.match.group('options')
|
||||||
|
if not options:
|
||||||
|
options = ''
|
||||||
|
|
||||||
|
return (kw.linenum,
|
||||||
|
'{indentation}#+{key}{options}:{spacing}{value}'.format(
|
||||||
|
indentation=kw.match.group('indentation'),
|
||||||
|
key=kw.key,
|
||||||
|
options=kw.options,
|
||||||
|
spacing=kw.match.group('spacing'),
|
||||||
|
value=kw.value,
|
||||||
|
))
|
||||||
|
|
||||||
|
def dump_property(self, prop: Property):
|
||||||
|
plus = prop.match.group('plus')
|
||||||
|
if plus is None: plus = ''
|
||||||
|
|
||||||
|
if isinstance(prop.value, Timestamp):
|
||||||
|
value = timestamp_to_string(prop.value)
|
||||||
|
else:
|
||||||
|
value = prop.value
|
||||||
|
|
||||||
|
return (prop.linenum, '{indentation}:{key}{plus}:{spacing}{value}'.format(
|
||||||
|
indentation=prop.match.group('indentation'),
|
||||||
|
key=prop.key,
|
||||||
|
plus=plus,
|
||||||
|
spacing=prop.match.group('spacing'),
|
||||||
|
value=value,
|
||||||
|
))
|
||||||
|
|
||||||
|
def dump_contents(self, raw: RawLine):
|
||||||
|
return (raw.linenum, raw.line)
|
||||||
|
|
||||||
|
def dump_structural(self, structural: Tuple):
|
||||||
|
return (structural[0], structural[1])
|
||||||
|
|
||||||
|
def dump_headline(self, headline):
|
||||||
|
yield headline['orig'].group('stars') + ' ' + headline['orig'].group('spacing') + headline['orig'].group('line')
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
KW_T = 0
|
||||||
|
CONTENT_T = 1
|
||||||
|
PROPERTIES_T = 2
|
||||||
|
STRUCTURAL_T = 3
|
||||||
|
for keyword in headline['keywords']:
|
||||||
|
lines.append((KW_T, self.dump_kw(keyword)))
|
||||||
|
|
||||||
|
for content in headline['contents']:
|
||||||
|
lines.append((CONTENT_T, self.dump_contents(content)))
|
||||||
|
|
||||||
|
for prop in headline['properties']:
|
||||||
|
lines.append((PROPERTIES_T, self.dump_property(prop)))
|
||||||
|
|
||||||
|
for struct in headline['structural']:
|
||||||
|
lines.append((STRUCTURAL_T, self.dump_structural(struct)))
|
||||||
|
|
||||||
|
lines = sorted(lines, key=lambda x: x[1][0])
|
||||||
|
|
||||||
|
structured_lines = []
|
||||||
|
last_type = None
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
ltype = line[0]
|
||||||
|
content = line[1][1]
|
||||||
|
|
||||||
|
if ltype == PROPERTIES_T and last_type not in (STRUCTURAL_T, PROPERTIES_T):
|
||||||
|
# No structural opening
|
||||||
|
structured_lines.append(' ' * content.index(':') + ':PROPERTIES:')
|
||||||
|
logging.warning("Added structural: ".format(line[1][0], structured_lines[-1].strip()))
|
||||||
|
elif ltype not in (STRUCTURAL_T, PROPERTIES_T) and last_type == PROPERTIES_T:
|
||||||
|
# No structural closing
|
||||||
|
last_line = lines[i - 1][1][1]
|
||||||
|
structured_lines.append(' ' * last_line.index(':') + ':END:')
|
||||||
|
logging.warning("Added structural:{}: {}".format(line[1][0], structured_lines[-1].strip()))
|
||||||
|
|
||||||
|
last_type = ltype
|
||||||
|
structured_lines.append(content)
|
||||||
|
|
||||||
|
yield from structured_lines
|
||||||
|
|
||||||
|
for child in headline['children']:
|
||||||
|
yield from self.dump_headline(child)
|
||||||
|
|
||||||
|
def dump(self):
|
||||||
|
lines = []
|
||||||
|
for kw in self.keywords:
|
||||||
|
lines.append(self.dump_kw(kw))
|
||||||
|
|
||||||
|
for line in self.contents:
|
||||||
|
lines.append(self.dump_contents(line))
|
||||||
|
|
||||||
|
yield from map(lambda x: x[1], sorted(lines, key=lambda x: x[0]))
|
||||||
|
|
||||||
|
for headline in self.headlines:
|
||||||
|
yield from self.dump_headline(headline)
|
||||||
|
|
||||||
class OrgDomReader:
|
class OrgDomReader:
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.headlines: List[Headline] = []
|
self.headlines: List[Headline] = []
|
||||||
self.keywords: List[Property] = []
|
self.keywords: List[Property] = []
|
||||||
self.headline_hierarchy: List[OrgDom] = []
|
self.headline_hierarchy: List[OrgDom] = []
|
||||||
|
self.contents: List[RawLine] = []
|
||||||
|
|
||||||
def finalize(self):
|
def finalize(self):
|
||||||
return OrgDom(self.headlines, self.keywords)
|
return OrgDom(self.headlines, self.keywords, self.contents)
|
||||||
|
|
||||||
## Construction
|
## Construction
|
||||||
def add_headline(self, linenum: int, match: re.Match) -> int:
|
def add_headline(self, linenum: int, match: re.Match) -> int:
|
||||||
@ -127,6 +245,7 @@ class OrgDomReader:
|
|||||||
'children': [],
|
'children': [],
|
||||||
'keywords': [],
|
'keywords': [],
|
||||||
'properties': [],
|
'properties': [],
|
||||||
|
'structural': [],
|
||||||
}
|
}
|
||||||
|
|
||||||
while (depth - 1) > len(self.headline_hierarchy):
|
while (depth - 1) > len(self.headline_hierarchy):
|
||||||
@ -143,20 +262,27 @@ class OrgDomReader:
|
|||||||
|
|
||||||
|
|
||||||
def add_keyword_line(self, linenum: int, match: re.Match) -> int:
|
def add_keyword_line(self, linenum: int, match: re.Match) -> int:
|
||||||
|
options = match.group('options')
|
||||||
|
kw = Keyword(linenum, match, match.group('key'), match.group('value'), options if options is not None else '')
|
||||||
if len(self.headline_hierarchy) == 0:
|
if len(self.headline_hierarchy) == 0:
|
||||||
self.keywords.append(match)
|
self.keywords.append(kw)
|
||||||
else:
|
else:
|
||||||
self.headline_hierarchy[-1]['keywords'].append('match')
|
self.headline_hierarchy[-1]['keywords'].append(kw)
|
||||||
|
|
||||||
def add_raw_line(self, linenum: int, line: str) -> int:
|
def add_raw_line(self, linenum: int, line: str) -> int:
|
||||||
print('>>', line)
|
raw = RawLine(linenum, line)
|
||||||
pass
|
if len(self.headline_hierarchy) == 0:
|
||||||
|
self.contents.append(raw)
|
||||||
|
else:
|
||||||
|
self.headline_hierarchy[-1]['contents'].append(raw)
|
||||||
|
|
||||||
def add_property_drawer_line(self, linenum: int, match: re.Match) -> int:
|
def add_property_drawer_line(self, linenum: int, line: str, match: re.Match) -> int:
|
||||||
self.current_drawer = self.headline_hierarchy[-1]['properties']
|
self.current_drawer = self.headline_hierarchy[-1]['properties']
|
||||||
|
self.headline_hierarchy[-1]['structural'].append((linenum, line))
|
||||||
|
|
||||||
def add_drawer_end_line(self, linenum: int, match: re.Match) -> int:
|
def add_drawer_end_line(self, linenum: int, line: str, match: re.Match) -> int:
|
||||||
self.current_drawer = None
|
self.current_drawer = None
|
||||||
|
self.headline_hierarchy[-1]['structural'].append((linenum, line))
|
||||||
|
|
||||||
def add_node_properties_line(self, linenum: int, match: re.Match) -> int:
|
def add_node_properties_line(self, linenum: int, match: re.Match) -> int:
|
||||||
key = match.group('key')
|
key = match.group('key')
|
||||||
@ -172,7 +298,7 @@ class OrgDomReader:
|
|||||||
elif as_time := parse_org_time(value):
|
elif as_time := parse_org_time(value):
|
||||||
value = as_time
|
value = as_time
|
||||||
|
|
||||||
self.current_drawer.append(Property(key, value, None))
|
self.current_drawer.append(Property(linenum, match, key, value, None))
|
||||||
|
|
||||||
def read(self, s, environment):
|
def read(self, s, environment):
|
||||||
lines = s.split('\n')
|
lines = s.split('\n')
|
||||||
@ -180,32 +306,35 @@ class OrgDomReader:
|
|||||||
|
|
||||||
for linenum, line in reader:
|
for linenum, line in reader:
|
||||||
if m := RAW_LINE_RE.match(line):
|
if m := RAW_LINE_RE.match(line):
|
||||||
# TODO: Parse line
|
|
||||||
self.add_raw_line(linenum, line)
|
self.add_raw_line(linenum, line)
|
||||||
elif m := HEADLINE_RE.match(line):
|
elif m := HEADLINE_RE.match(line):
|
||||||
# TODO: Parse headline
|
|
||||||
self.add_headline(linenum, m)
|
self.add_headline(linenum, m)
|
||||||
elif m := KEYWORDS_RE.match(line):
|
elif m := KEYWORDS_RE.match(line):
|
||||||
# TODO: Parse line
|
|
||||||
self.add_keyword_line(linenum, m)
|
self.add_keyword_line(linenum, m)
|
||||||
elif m := PROPERTY_DRAWER_RE.match(line):
|
elif m := PROPERTY_DRAWER_RE.match(line):
|
||||||
# TODO: Parse line
|
self.add_property_drawer_line(linenum, line, m)
|
||||||
self.add_property_drawer_line(linenum, m)
|
|
||||||
elif m := DRAWER_END_RE.match(line):
|
elif m := DRAWER_END_RE.match(line):
|
||||||
# TODO: Parse line
|
self.add_drawer_end_line(linenum, line, m)
|
||||||
self.add_drawer_end_line(linenum, m)
|
|
||||||
elif m := NODE_PROPERTIES_RE.match(line):
|
elif m := NODE_PROPERTIES_RE.match(line):
|
||||||
# TODO: Parse line
|
|
||||||
self.add_node_properties_line(linenum, m)
|
self.add_node_properties_line(linenum, m)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError('{}: ‘{}’'.format(linenum, line))
|
raise NotImplementedError('{}: ‘{}’'.format(linenum, line))
|
||||||
|
|
||||||
|
|
||||||
def loads(s, environment=BASE_ENVIRONMENT):
|
def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=False):
|
||||||
doc = OrgDomReader()
|
doc = OrgDomReader()
|
||||||
doc.read(s, environment)
|
doc.read(s, environment)
|
||||||
return doc.finalize()
|
dom = doc.finalize()
|
||||||
|
if extra_cautious: # Check that all options can be properly re-serialized
|
||||||
|
if dumps(dom) != s:
|
||||||
|
raise NotImplementedError("Error re-serializing, file uses something not implemented")
|
||||||
|
return dom
|
||||||
|
|
||||||
|
|
||||||
def load(f, environment=BASE_ENVIRONMENT):
|
def load(f, environment=BASE_ENVIRONMENT, extra_cautious=False):
|
||||||
return loads(f.read(), environment)
|
return loads(f.read(), environment, extra_cautious)
|
||||||
|
|
||||||
|
|
||||||
|
def dumps(doc):
|
||||||
|
result = '\n'.join(doc.dump())
|
||||||
|
return result
|
||||||
|
@ -1,10 +1,9 @@
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
import unittest
|
import unittest
|
||||||
from datetime import datetime as DT
|
from datetime import datetime as DT
|
||||||
|
|
||||||
from org_dom import load, loads
|
from org_dom import dumps, load, loads
|
||||||
from utils.dom_assertions import HL, Dom
|
from utils.dom_assertions import HL, Dom
|
||||||
|
|
||||||
DIR = os.path.dirname(os.path.abspath(__file__))
|
DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
@ -37,3 +36,11 @@ class TestSerde(unittest.TestCase):
|
|||||||
])))
|
])))
|
||||||
|
|
||||||
ex.assert_matches(self, doc)
|
ex.assert_matches(self, doc)
|
||||||
|
|
||||||
|
def test_mimic_write_file_01(self):
|
||||||
|
"""A goal of this library is to be able to update a file without changing parts not directly modified."""
|
||||||
|
with open(os.path.join(DIR, '01-simple.org')) as f:
|
||||||
|
orig = f.read()
|
||||||
|
doc = loads(orig)
|
||||||
|
|
||||||
|
self.assertEqual(dumps(doc), orig)
|
||||||
|
@ -23,7 +23,7 @@ class Dom:
|
|||||||
test_case.assertEqual(len(doc_props), len(self.props))
|
test_case.assertEqual(len(doc_props), len(self.props))
|
||||||
|
|
||||||
for i, prop in enumerate(self.props):
|
for i, prop in enumerate(self.props):
|
||||||
test_case.assertEqual(doc_props[i].name, prop[0])
|
test_case.assertEqual(doc_props[i].key, prop[0])
|
||||||
test_case.assertEqual(doc_props[i].value, prop[1])
|
test_case.assertEqual(doc_props[i].value, prop[1])
|
||||||
|
|
||||||
# @TODO: Check properties
|
# @TODO: Check properties
|
||||||
@ -58,7 +58,7 @@ class HL:
|
|||||||
test_case.assertEqual(len(doc_props), len(self.props))
|
test_case.assertEqual(len(doc_props), len(self.props))
|
||||||
|
|
||||||
for i, prop in enumerate(self.props):
|
for i, prop in enumerate(self.props):
|
||||||
test_case.assertEqual(doc_props[i].name, prop[0])
|
test_case.assertEqual(doc_props[i].key, prop[0])
|
||||||
if isinstance(prop[1], datetime):
|
if isinstance(prop[1], datetime):
|
||||||
test_case.assertEqual(
|
test_case.assertEqual(
|
||||||
timestamp_to_datetime(doc_props[i].value), prop[1])
|
timestamp_to_datetime(doc_props[i].value), prop[1])
|
||||||
|
Loading…
Reference in New Issue
Block a user