org-rw/org_dom/org_dom.py

212 lines
7.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import re
import collections
from typing import List
BASE_ENVIRONMENT = {
'org-footnote-section': 'Footnotes',
'org-options-keywords': (
"ARCHIVE:",
"AUTHOR:",
"BIND:",
"CATEGORY:",
"COLUMNS:",
"CREATOR:",
"DATE:",
"DESCRIPTION:",
"DRAWERS:",
"EMAIL:",
"EXCLUDE_TAGS:",
"FILETAGS:",
"INCLUDE:",
"INDEX:",
"KEYWORDS:",
"LANGUAGE:",
"MACRO:",
"OPTIONS:",
"PROPERTY:",
"PRIORITIES:",
"SELECT_TAGS:",
"SEQ_TODO:",
"SETUPFILE:",
"STARTUP:",
"TAGS:"
"TITLE:",
"TODO:",
"TYP_TODO:",
"SELECT_TAGS:",
"EXCLUDE_TAGS:"
),
}
HEADLINE_RE = re.compile(r'^(?P<stars>\*+) (?P<spacing>\s*)(?P<line>.*)$')
KEYWORDS_RE = re.compile(r'^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$')
PROPERTY_DRAWER_RE = re.compile(r'^(?P<indentation>\s*):PROPERTIES:(?P<end_indentation>\s*)$')
DRAWER_END_RE = re.compile(r'^(?P<indentation>\s*):END:(?P<end_indentation>\s*)$')
NODE_PROPERTIES_RE = re.compile(r'^(?P<indentation>\s*):(?P<key>[^+:]+)(?P<plus>\+)?:(?P<spacing>\s*)(?P<value>.*)$')
RAW_LINE_RE = re.compile(r'^\s*([^\s#:*]|$)')
BASE_TIME_STAMP_RE = r'(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}) (?P<dow>[^ ]+)( (?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2})(--(?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?)?'
ACTIVE_TIME_STAMP_RE = re.compile(r'<{}>'.format(BASE_TIME_STAMP_RE))
INACTIVE_TIME_STAMP_RE = re.compile(r'\[{}\]'.format(BASE_TIME_STAMP_RE))
# BASE_TIME_RANGE_RE = (r'(?P<start_year>\d{4})-(?P<start_month>\d{2})-(?P<start_day>\d{2}) (?P<start_dow>[^ ]+)((?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2}))?',
# r'(?P<end_year>\d{4})-(?P<end_month>\d{2})-(?P<end_day>\d{2}) (?P<end_dow>[^ ]+)((?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?')
Headline = collections.namedtuple('Headline', ('start_line', 'depth',
'keyword_start', 'keyword',
'priority_start', 'priority',
'title_start', 'title',
'tags_start', 'tags',
'content',
'children',
))
Property = collections.namedtuple('Property', ('name', 'value', 'options'))
TimeRange = collections.namedtuple('TimeRange', ('start_time', 'end_time'))
Timestamp = collections.namedtuple('Timestamp', ('year', 'month', 'day', 'dow', 'hour', 'minute'))
def parse_org_time(value):
if m := ACTIVE_TIME_STAMP_RE.match(value):
active = True
elif m := INACTIVE_TIME_STAMP_RE.match(value):
active = False
else:
return None
if m.group('end_hour'):
return TimeRange(Timestamp(int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('start_hour')), int(m.group('start_minute'))),
Timestamp(int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('end_hour')), int(m.group('end_minute'))))
return Timestamp(int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('start_hour')), int(m.group('start_minute')))
class OrgDom:
def __init__(self, headlines, keywords):
self.headlines: List[Headline] = headlines
self.keywords: List[Property] = keywords
def serialize(self):
raise NotImplementedError()
## Querying
def getProperties(self):
return [
Property(name=kw.group('key'),
value=kw.group('value'),
options=kw.group('options'),
)
for kw in self.keywords
]
def getTopHeadlines(self):
return self.headlines
class OrgDomReader:
def __init__(self):
self.headlines: List[Headline] = []
self.keywords: List[Property] = []
self.headline_hierarchy: List[OrgDom] = []
def finalize(self):
return OrgDom(self.headlines, self.keywords)
## Construction
def add_headline(self, linenum: int, match: re.Match) -> int:
# Position reader on the proper headline
stars = match.group('stars')
depth = len(stars) - 1
headline = {
'linenum': linenum,
'orig': match,
'title': match.group('line'),
'contents': [],
'children': [],
'keywords': [],
'properties': [],
}
while (depth - 1) > len(self.headline_hierarchy):
# Introduce structural headlines
self.headline_hierarchy.append(None)
while depth < len(self.headline_hierarchy):
self.headline_hierarchy.pop()
if depth == 0:
self.headlines.append(headline)
else:
self.headline_hierarchy[-1]['children'].append(headline)
self.headline_hierarchy.append(headline)
def add_keyword_line(self, linenum: int, match: re.Match) -> int:
if len(self.headline_hierarchy) == 0:
self.keywords.append(match)
else:
self.headline_hierarchy[-1]['keywords'].append('match')
def add_raw_line(self, linenum: int, line: str) -> int:
print('>>', line)
pass
def add_property_drawer_line(self, linenum: int, match: re.Match) -> int:
self.current_drawer = self.headline_hierarchy[-1]['properties']
def add_drawer_end_line(self, linenum: int, match: re.Match) -> int:
self.current_drawer = None
def add_node_properties_line(self, linenum: int, match: re.Match) -> int:
key = match.group('key')
value = match.group('value').strip()
if (value.count('>--<') == 1) or (value.count(']--[') == 1):
# Time ranges with two different dates
# @TODO properly consider "=> DURATION" section
chunks = value.split('=').split('--')
as_time_range = parse_org_time(chunks[0], chunks[1])
if (as_time_range[0] is not None) and (as_time_range[1] is not None):
value = TimeRange(as_time_range[0], as_time_range[1])
elif as_time := parse_org_time(value):
value = as_time
self.current_drawer.append(Property(key, value, None))
def read(self, s, environment):
lines = s.split('\n')
reader = enumerate(lines)
for linenum, line in reader:
if m := RAW_LINE_RE.match(line):
# TODO: Parse line
self.add_raw_line(linenum, line)
elif m := HEADLINE_RE.match(line):
# TODO: Parse headline
self.add_headline(linenum, m)
elif m := KEYWORDS_RE.match(line):
# TODO: Parse line
self.add_keyword_line(linenum, m)
elif m := PROPERTY_DRAWER_RE.match(line):
# TODO: Parse line
self.add_property_drawer_line(linenum, m)
elif m := DRAWER_END_RE.match(line):
# TODO: Parse line
self.add_drawer_end_line(linenum, m)
elif m := NODE_PROPERTIES_RE.match(line):
# TODO: Parse line
self.add_node_properties_line(linenum, m)
else:
raise NotImplementedError('{}: {}'.format(linenum, line))
def loads(s, environment=BASE_ENVIRONMENT):
doc = OrgDomReader()
doc.read(s, environment)
return doc.finalize()
def load(f, environment=BASE_ENVIRONMENT):
return loads(f.read(), environment)