org-rw/org_rw/org_rw.py

1538 lines
47 KiB
Python
Raw Normal View History

2021-01-17 11:40:15 +00:00
from __future__ import annotations
import collections
import difflib
import logging
import os
2020-06-21 19:27:40 +00:00
import re
import sys
from datetime import date, datetime, timedelta
from enum import Enum
from typing import Generator, List, Tuple, Union
2020-06-21 19:27:40 +00:00
BASE_ENVIRONMENT = {
"org-footnote-section": "Footnotes",
"org-options-keywords": (
2020-06-21 19:27:40 +00:00
"ARCHIVE:",
"AUTHOR:",
"BIND:",
"CATEGORY:",
"COLUMNS:",
"CREATOR:",
"DATE:",
"DESCRIPTION:",
"DRAWERS:",
"EMAIL:",
"EXCLUDE_TAGS:",
"FILETAGS:",
"INCLUDE:",
"INDEX:",
"KEYWORDS:",
"LANGUAGE:",
"MACRO:",
"OPTIONS:",
"PROPERTY:",
"PRIORITIES:",
"SELECT_TAGS:",
"SEQ_TODO:",
"SETUPFILE:",
"STARTUP:",
"TAGS:" "TITLE:",
2020-06-21 19:27:40 +00:00
"TODO:",
"TYP_TODO:",
"SELECT_TAGS:",
"EXCLUDE_TAGS:",
2020-06-21 19:27:40 +00:00
),
}
DEFAULT_TODO_KEYWORDS = ["TODO"]
DEFAULT_DONE_KEYWORDS = ["DONE"]
HEADLINE_TAGS_RE = re.compile(r"((:[a-zA-Z0-9_@#%]+)+:)\s*$")
HEADLINE_RE = re.compile(r"^(?P<stars>\*+)(?P<spacing>\s*)(?P<line>.*?)$")
KEYWORDS_RE = re.compile(
r"^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$"
)
DRAWER_START_RE = re.compile(r"^(?P<indentation>\s*):([^:]+):(?P<end_indentation>\s*)$")
DRAWER_END_RE = re.compile(r"^(?P<indentation>\s*):END:(?P<end_indentation>\s*)$", re.I)
NODE_PROPERTIES_RE = re.compile(
r"^(?P<indentation>\s*):(?P<key>[^+:]+)(?P<plus>\+)?:(?P<spacing>\s*)(?P<value>.+)$"
)
RAW_LINE_RE = re.compile(r"^\s*([^\s#:*]|$)")
2021-01-17 11:40:15 +00:00
BASE_TIME_STAMP_RE = r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})( ?(?P<dow>[^ ]+))?( (?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2})(-+(?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?)?(?P<repetition> (?P<rep_mark>(\+|\+\+|\.\+|-|--))(?P<rep_value>\d+)(?P<rep_unit>[hdwmy]))?"
CLEAN_TIME_STAMP_RE = r"\d{4}-\d{2}-\d{2}( ?([^ ]+))?( (\d{1,2}):(\d{1,2})(-+(\d{1,2}):(\d{1,2}))?)?( (\+|\+\+|\.\+|-|--)\d+[hdwmy])?"
2020-06-21 19:27:40 +00:00
ACTIVE_TIME_STAMP_RE = re.compile(r"<{}>".format(BASE_TIME_STAMP_RE))
INACTIVE_TIME_STAMP_RE = re.compile(r"\[{}\]".format(BASE_TIME_STAMP_RE))
PLANNING_RE = re.compile(
r"(?P<indentation>\s*)"
+ r"(SCHEDULED:\s*(?P<scheduled>[<\[]"
+ CLEAN_TIME_STAMP_RE
2021-01-17 12:04:27 +00:00
+ r"[>\]](--[<\[]"
+ CLEAN_TIME_STAMP_RE
+ r"[>\]])?)\s*"
+ r"|CLOSED:\s*(?P<closed>[<\[]"
+ CLEAN_TIME_STAMP_RE
2021-01-17 12:04:27 +00:00
+ r"[>\]](--[<\[]"
+ CLEAN_TIME_STAMP_RE
+ r"[>\]])?)\s*"
+ r"|DEADLINE:\s*(?P<deadline>[<\[]"
+ CLEAN_TIME_STAMP_RE
2021-01-17 12:04:27 +00:00
+ r"[>\]](--[<\[]"
+ CLEAN_TIME_STAMP_RE
+ r"[>\]])?)\s*"
r")+\s*"
)
2020-06-21 19:27:40 +00:00
# Org-Babel
2020-12-05 23:26:44 +00:00
BEGIN_SRC_RE = re.compile(r"^\s*#\+BEGIN_SRC(?P<content>.*)$", re.I)
END_SRC_RE = re.compile(r"^\s*#\+END_SRC\s*$", re.I)
RESULTS_DRAWER_RE = re.compile(r"^\s*:results:\s*$", re.I)
CodeSnippet = collections.namedtuple("CodeSnippet", ("name", "content", "result"))
2020-06-21 19:27:40 +00:00
# BASE_TIME_RANGE_RE = (r'(?P<start_year>\d{4})-(?P<start_month>\d{2})-(?P<start_day>\d{2}) (?P<start_dow>[^ ]+)((?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2}))?',
# r'(?P<end_year>\d{4})-(?P<end_month>\d{2})-(?P<end_day>\d{2}) (?P<end_dow>[^ ]+)((?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?')
2020-11-26 22:44:56 +00:00
2020-11-02 22:51:11 +00:00
def get_tokens(value):
if isinstance(value, Text):
return value.contents
if isinstance(value, RawLine):
return [value.line]
raise Exception("Unknown how to get tokens from: {}".format(value))
2020-11-26 22:44:56 +00:00
class RangeInRaw:
def __init__(self, content, start_token, end_token):
self._content = content
self._start_id = id(start_token)
self._end_id = id(end_token)
def update_range(self, new_contents):
# Find start token
for start_idx, tok in enumerate(self._content.contents):
if id(tok) == self._start_id:
break
else:
raise Exception("Start token not found")
# Find end token
for offset, tok in enumerate(self._content.contents[start_idx:]):
if id(tok) == self._end_id:
break
else:
raise Exception("End token not found")
# Remove old contents
for i in range(1, offset):
self._content.contents.pop(start_idx + 1)
# Add new ones
for i, element in enumerate(new_contents):
self._content.contents.insert(start_idx + i + 1, element)
2020-11-02 22:51:11 +00:00
def get_links_from_content(content):
in_link = False
in_description = False
link_value = []
link_description = []
for i, tok in enumerate(get_tokens(content)):
2020-11-02 22:51:11 +00:00
if isinstance(tok, LinkToken):
if tok.tok_type == LinkTokenType.OPEN_LINK:
in_link = True
open_link_token = tok
2020-11-02 22:51:11 +00:00
elif tok.tok_type == LinkTokenType.OPEN_DESCRIPTION:
in_description = True
elif tok.tok_type == LinkTokenType.CLOSE:
rng = RangeInRaw(content, open_link_token, tok)
yield Link(
"".join(link_value),
"".join(link_description) if in_description else None,
rng,
)
2020-11-02 22:51:11 +00:00
in_link = False
in_description = False
link_value = []
link_description = []
elif isinstance(tok, str) and in_link:
if in_description:
link_description.append(tok)
else:
link_value.append(tok)
2020-11-26 22:44:56 +00:00
2020-11-02 22:51:11 +00:00
class Headline:
2020-11-26 22:44:56 +00:00
def __init__(
self,
start_line,
depth,
orig,
properties,
keywords,
priority_start,
priority,
title_start,
title,
state,
2020-11-26 22:44:56 +00:00
tags_start,
tags,
contents,
children,
structural,
2020-12-05 23:26:44 +00:00
delimiters,
parent,
is_todo,
is_done,
spacing,
2020-11-26 22:44:56 +00:00
):
2020-11-02 22:51:11 +00:00
self.start_line = start_line
self.depth = depth
self.orig = orig
self.properties = properties
self.keywords = keywords
self.priority_start = priority_start
self.priority = priority
self.title_start = title_start
self.title = title
self.state = state
2020-11-02 22:51:11 +00:00
self.tags_start = tags_start
self.shallow_tags = tags
2020-11-02 22:51:11 +00:00
self.contents = contents
self.children = children
self.structural = structural
2020-12-05 23:26:44 +00:00
self.delimiters = delimiters
self.parent = parent
self.is_todo = is_todo
self.is_done = is_done
self.scheduled = None
self.deadline = None
self.closed = None
self.spacing = spacing
# Read planning line
planning_line = self.get_element_in_line(start_line + 1)
# Ignore if not found or is a structural line
if planning_line is None or isinstance(planning_line, tuple):
return
if m := PLANNING_RE.match(planning_line.get_raw()):
2021-01-11 23:58:28 +00:00
self._planning_indendation = m.group("indentation")
self._planning_order = []
keywords = ["SCHEDULED", "CLOSED", "DEADLINE"]
plan = planning_line.get_raw().split("\n")[0]
indexes = [(kw, plan.find(kw)) for kw in keywords]
self._planning_order = [
kw
for (kw, idx) in sorted(
filter(lambda v: v[1] >= 0, indexes), key=lambda v: v[1]
)
]
if scheduled := m.group("scheduled"):
2021-01-17 12:04:27 +00:00
self.scheduled = parse_time(scheduled)
if closed := m.group("closed"):
2021-01-17 12:04:27 +00:00
self.closed = parse_time(closed)
if deadline := m.group("deadline"):
2021-01-17 12:04:27 +00:00
self.deadline = parse_time(deadline)
2021-01-11 23:58:28 +00:00
# Remove from contents
self._remove_element_in_line(start_line + 1)
def get_planning_line(self):
if self.scheduled is None and self.closed is None and self.deadline is None:
return None
contents = [self._planning_indendation]
for el in self._planning_order:
if el == "SCHEDULED" and self.scheduled is not None:
contents.append("SCHEDULED: {} ".format(self.scheduled.to_raw()))
elif el == "CLOSED" and self.closed is not None:
contents.append("CLOSED: {} ".format(self.closed.to_raw()))
elif el == "DEADLINE" and self.deadline is not None:
contents.append("DEADLINE: {} ".format(self.deadline.to_raw()))
# Consider elements added (not present on planning order)
if ("SCHEDULED" not in self._planning_order) and (self.scheduled is not None):
contents.append("SCHEDULED: {} ".format(self.scheduled.to_raw()))
if ("CLOSED" not in self._planning_order) and (self.closed is not None):
contents.append("CLOSED: {} ".format(self.closed.to_raw()))
if ("DEADLINE" not in self._planning_order) and (self.deadline is not None):
contents.append("DEADLINE: {} ".format(self.deadline.to_raw()))
return "".join(contents).rstrip()
2021-01-04 23:01:48 +00:00
@property
def clock(self):
times = []
for chunk in self.contents:
for line in chunk.get_raw().split("\n"):
content = line.strip()
if not content.startswith("CLOCK:"):
continue
time_seg = content[len("CLOCK:") :].strip()
if "--" in time_seg:
# TODO: Consider duration
start, end = time_seg.split("=")[0].split("--")
as_time_range = parse_org_time_range(start, end)
parsed = as_time_range
else:
2021-01-17 11:40:15 +00:00
parsed = OrgTime.parse(time_seg)
2021-01-04 23:01:48 +00:00
times.append(parsed)
return times
@property
def tags(self):
if isinstance(self.parent, OrgDoc):
return list(self.shallow_tags)
else:
return list(self.shallow_tags) + self.parent.tags
2020-11-02 22:51:11 +00:00
2020-12-28 13:34:18 +00:00
def get_property(self, name: str, default=None):
for prop in self.properties:
if prop.key == name:
return prop.value
return default
2020-11-02 22:51:11 +00:00
def get_links(self):
for content in self.contents:
yield from get_links_from_content(content)
2020-12-05 23:26:44 +00:00
def get_lines_between(self, start, end):
for line in self.contents:
if start <= line.linenum < end:
yield "".join(line.contents)
def get_contents(self, format):
if format == "raw":
lines = []
for line in self.contents:
lines.append(dump_contents(line))
yield from map(lambda x: x[1], sorted(lines, key=lambda x: x[0]))
else:
raise NotImplementedError()
2020-12-05 23:26:44 +00:00
def get_element_in_line(self, linenum):
for line in self.contents:
if linenum == line.linenum:
return line
for (s_lnum, struc) in self.structural:
if linenum == s_lnum:
return ("structural", struc)
2021-01-11 23:58:28 +00:00
def _remove_element_in_line(self, linenum):
found = None
for i, line in enumerate(self.contents):
if linenum == line.linenum:
found = i
break
assert found is not None
el = self.contents[found]
assert isinstance(el, Text)
raw = el.get_raw()
if "\n" not in raw:
# Remove the element found
self.contents.pop(found)
else:
# Remove the first line
self.contents[found] = parse_content_block(
[RawLine(self.contents[found].linenum + 1, raw.split("\n", 1)[1])]
)
2020-12-05 23:26:44 +00:00
def get_structural_end_after(self, linenum):
for (s_lnum, struc) in self.structural:
if s_lnum > linenum and struc.strip().upper() == ":END:":
return (s_lnum, struc)
def get_code_snippets(self):
inside_code = False
sections = []
for delimiter in self.delimiters:
if delimiter.delimiter_type == DelimiterLineType.BEGIN_SRC:
line_start = delimiter.linenum
inside_code = True
elif delimiter.delimiter_type == DelimiterLineType.END_SRC:
inside_code = False
start, end = line_start, delimiter.linenum
lines = self.get_lines_between(start + 1, end)
contents = "\n".join(lines)
if contents.endswith("\n"):
# This is not ideal, but to avoid having to do this maybe
# the content parsing must be re-thinked
contents = contents[:-1]
sections.append(
{
"line_first": start + 1,
"line_last": end - 1,
"content": contents,
}
)
line_start = None
for kword in self.keywords:
if kword.key.upper() == "RESULTS":
for snippet in sections:
if kword.linenum > snippet["line_last"]:
result_first = self.get_element_in_line(kword.linenum + 1)
if isinstance(result_first, Text):
result = "\n".join(result_first.contents)
snippet["result"] = result
if result.strip().startswith(": "):
# Split lines and remove ':'
lines = result.split("\n")
s_result = []
for line in lines:
if ": " not in line:
break
s_result.append(line.lstrip(" ")[2:])
snippet["result"] = "\n".join(s_result)
elif (
isinstance(result_first, tuple)
and len(result_first) == 2
and result_first[0] == "structural"
and result_first[1].strip().upper() == ":RESULTS:"
):
(end_line, _) = self.get_structural_end_after(
kword.linenum + 1
)
contents = "\n".join(
self.get_lines_between(kword.linenum + 1, end_line)
)
indentation = result_first[1].index(":")
dedented = "\n".join(
[line[indentation:] for line in contents.split("\n")]
)
if dedented.endswith("\n"):
dedented = dedented[:-1]
snippet["result"] = dedented
break
results = []
for section in sections:
name = None
content = section["content"]
code_result = section.get("result", None)
results.append(CodeSnippet(name=name, content=content, result=code_result))
return results
2020-11-26 22:44:56 +00:00
RawLine = collections.namedtuple("RawLine", ("linenum", "line"))
Keyword = collections.namedtuple(
"Keyword", ("linenum", "match", "key", "value", "options")
)
Property = collections.namedtuple(
"Property", ("linenum", "match", "key", "value", "options")
)
# @TODO How are [YYYY-MM-DD HH:mm--HH:mm] and ([... HH:mm]--[... HH:mm]) differentiated ?
# @TODO Consider recurrence annotations
2021-01-11 23:58:28 +00:00
class Timestamp:
def __init__(self, active, year, month, day, dow, hour, minute, repetition=None):
self.active = active
self._year = year
self._month = month
self._day = day
self.dow = dow
self.hour = hour
self.minute = minute
self.repetition = repetition
def to_datetime(self) -> Union[datetime, date]:
if self.hour is not None:
return datetime(self.year, self.month, self.day, self.hour, self.minute)
else:
return datetime(self.year, self.month, self.day, 0, 0)
2021-01-17 11:40:15 +00:00
2021-01-11 23:58:28 +00:00
def __eq__(self, other):
if not isinstance(other, Timestamp):
return False
return (
(self.active == other.active)
and (self.year == other.year)
and (self.month == other.month)
and (self.day == other.day)
and (self.dow == other.dow)
and (self.hour == other.hour)
and (self.minute == other.minute)
and (self.repetition == other.repetition)
)
def __repr__(self):
return timestamp_to_string(self)
# Properties whose modification changes the Day-Of-Week
@property
def year(self):
return self._year
@year.setter
def year(self, value):
self._year = value
self.dow = None
@property
def month(self):
return self._month
@month.setter
def month(self, value):
self._month = value
self.dow = None
@property
def day(self):
return self._day
@day.setter
def day(self, value):
self._day = value
self.dow = None
2020-12-05 23:26:44 +00:00
class DelimiterLineType(Enum):
BEGIN_SRC = 1
END_SRC = 2
DelimiterLine = collections.namedtuple(
"DelimiterLine", ("linenum", "line", "delimiter_type")
)
class MarkerType(Enum):
NO_MODE = 0b0
BOLD_MODE = 0b1
CODE_MODE = 0b10
ITALIC_MODE = 0b100
STRIKE_MODE = 0b1000
UNDERLINED_MODE = 0b10000
VERBATIM_MODE = 0b100000
2020-11-26 22:44:56 +00:00
MARKERS = {
"*": MarkerType.BOLD_MODE,
"~": MarkerType.CODE_MODE,
"/": MarkerType.ITALIC_MODE,
"+": MarkerType.STRIKE_MODE,
"_": MarkerType.UNDERLINED_MODE,
"=": MarkerType.VERBATIM_MODE,
}
ModeToMarker = {}
for tok, mode in MARKERS.items():
ModeToMarker[mode] = tok
MarkerToken = collections.namedtuple("MarkerToken", ("closing", "tok_type"))
2020-11-02 22:51:11 +00:00
LinkToken = collections.namedtuple("LinkToken", ("tok_type"))
2020-11-26 22:44:56 +00:00
2020-11-02 22:51:11 +00:00
class LinkTokenType(Enum):
OPEN_LINK = 3
OPEN_DESCRIPTION = 5
CLOSE = 4
2020-11-26 22:44:56 +00:00
BEGIN_PROPERTIES = "OPEN_PROPERTIES"
END_PROPERTIES = "CLOSE_PROPERTIES"
2020-11-26 22:44:56 +00:00
def token_from_type(tok_type):
return ModeToMarker[tok_type]
2020-06-21 19:27:40 +00:00
2021-01-04 23:01:48 +00:00
class TimeRange:
2021-01-17 11:40:15 +00:00
def __init__(self, start_time: OrgTime, end_time: OrgTime):
2021-01-04 23:01:48 +00:00
self.start_time = start_time
self.end_time = end_time
2021-01-17 12:04:27 +00:00
def to_raw(self) -> str:
return timerange_to_string(self)
2021-01-04 23:01:48 +00:00
@property
def duration(self) -> timedelta:
delta = self.end - self.start
return delta
@property
def start(self) -> datetime:
st = self.start_time
return datetime(st.year, st.month, st.day, st.hour or 0, st.minute or 0)
@property
def end(self) -> datetime:
et = self.end_time
return datetime(et.year, et.month, et.day, et.hour or 0, et.minute or 0)
2021-01-17 12:04:27 +00:00
def parse_time(value: str) -> Union[None, TimeRange, OrgTime]:
if (value.count(">--<") == 1) or (value.count("]--[") == 1):
# Time ranges with two different dates
# @TODO properly consider "=> DURATION" section
start, end = value.split("=")[0].split("--")
as_time_range = parse_org_time_range(start, end)
if (as_time_range.start_time is not None) and (
as_time_range.end_time is not None
):
return as_time_range
else:
raise Exception("Unknown time range format: {}".format(value))
elif as_time := OrgTime.parse(value):
return as_time
else:
return None
2021-01-17 11:40:15 +00:00
def parse_org_time_range(start, end) -> TimeRange:
return TimeRange(OrgTime.parse(start), OrgTime.parse(end))
2021-01-17 11:40:15 +00:00
class OrgTime:
def __init__(self, ts: Timestamp, end_time: Union[Timestamp, None] = None):
assert ts is not None
self.time = ts
self.end_time = end_time
2020-06-21 19:27:40 +00:00
2021-01-17 11:40:15 +00:00
def to_raw(self):
return timestamp_to_string(self.time, self.end_time)
def __repr__(self):
return f"OrgTime({self.to_raw()})"
@classmethod
def parse(self, value: str) -> OrgTime:
if m := ACTIVE_TIME_STAMP_RE.match(value):
active = True
elif m := INACTIVE_TIME_STAMP_RE.match(value):
active = False
else:
return None
if m.group("end_hour"):
return OrgTime(
Timestamp(
active,
int(m.group("year")),
int(m.group("month")),
int(m.group("day")),
m.group("dow"),
int(m.group("start_hour")),
int(m.group("start_minute")),
),
Timestamp(
active,
int(m.group("year")),
int(m.group("month")),
int(m.group("day")),
m.group("dow"),
int(m.group("end_hour")),
int(m.group("end_minute")),
),
)
return OrgTime(
Timestamp(
active,
int(m.group("year")),
int(m.group("month")),
int(m.group("day")),
m.group("dow"),
2021-01-17 11:40:15 +00:00
int(m.group("start_hour")) if m.group("start_hour") else None,
int(m.group("start_minute")) if m.group("start_minute") else None,
m.group("repetition").strip() if m.group("repetition") else None,
)
)
2021-01-17 11:40:15 +00:00
def time_from_str(s: str) -> OrgTime:
return OrgTime.parse(s)
def timerange_to_string(tr: TimeRange):
2021-01-17 11:40:15 +00:00
return tr.start_time.to_raw() + "--" + tr.end_time.to_raw()
2021-01-17 11:40:15 +00:00
def timestamp_to_string(ts: Timestamp, end_time: Union[Timestamp, None] = None) -> str:
date = "{year}-{month:02d}-{day:02d}".format(
year=ts.year, month=ts.month, day=ts.day
)
if ts.dow:
date = date + " " + ts.dow
if ts.hour is not None:
base = "{date} {hour:02}:{minute:02d}".format(
date=date, hour=ts.hour, minute=ts.minute
)
else:
base = date
2020-06-21 19:27:40 +00:00
2021-01-17 11:40:15 +00:00
if end_time is not None:
assert end_time.hour is not None
assert end_time.minute is not None
base = "{base}-{hour:02}:{minute:02d}".format(
base=base, hour=end_time.hour, minute=end_time.minute
)
2021-01-11 23:58:28 +00:00
if ts.repetition:
base = base + " " + ts.repetition
if ts.active:
return "<{}>".format(base)
else:
return "[{}]".format(base)
2020-06-21 19:27:40 +00:00
2020-10-25 19:23:08 +00:00
def get_raw(doc):
if isinstance(doc, str):
return doc
else:
return doc.get_raw()
2020-10-09 22:39:32 +00:00
2020-10-09 22:39:32 +00:00
class Line:
def __init__(self, linenum, contents):
self.linenum = linenum
self.contents = contents
def get_raw(self):
rawchunks = []
for chunk in self.contents:
if isinstance(chunk, str):
rawchunks.append(chunk)
else:
rawchunks.append(chunk.get_raw())
return "".join(rawchunks) + "\n"
2020-10-09 22:39:32 +00:00
2020-11-02 22:51:11 +00:00
class Link:
def __init__(self, value: str, description: str, origin: RangeInRaw):
self._value = value
self._description = description
self._origin = origin
2020-11-02 22:51:11 +00:00
def get_raw(self):
if self.description:
2020-11-26 22:44:56 +00:00
return "[[{}][{}]]".format(self.value, self.description)
2020-11-02 22:51:11 +00:00
else:
2020-11-26 22:44:56 +00:00
return "[[{}]]".format(self.value)
2020-11-02 22:51:11 +00:00
def _update_content(self):
new_contents = []
new_contents.append(self._value)
if self._description:
new_contents.append(LinkToken(LinkTokenType.OPEN_DESCRIPTION))
new_contents.append(self._description)
self._origin.update_range(new_contents)
@property
def value(self):
return self._value
@value.setter
def value(self, new_value):
self._value = new_value
self._update_content()
@property
def description(self):
return self._description
@description.setter
def description(self, new_description):
self._description = new_description
self._update_content()
2020-11-02 22:51:11 +00:00
2020-10-09 22:39:32 +00:00
class Text:
2020-10-25 19:23:08 +00:00
def __init__(self, contents, line):
2020-10-09 22:39:32 +00:00
self.contents = contents
2020-10-25 19:23:08 +00:00
self.linenum = line
2020-10-09 22:39:32 +00:00
2020-11-02 22:51:11 +00:00
def __repr__(self):
return "{{Text line: {}; content: {} }}".format(self.linenum, self.contents)
2020-10-09 22:39:32 +00:00
def get_raw(self):
contents = []
for chunk in self.contents:
if isinstance(chunk, str):
contents.append(chunk)
2020-11-02 22:51:11 +00:00
elif isinstance(chunk, LinkToken):
if chunk.tok_type == LinkTokenType.OPEN_LINK:
2020-11-26 22:44:56 +00:00
contents.append("[[")
2020-11-02 22:51:11 +00:00
elif chunk.tok_type == LinkTokenType.OPEN_DESCRIPTION:
2020-11-26 22:44:56 +00:00
contents.append("][")
2020-11-02 22:51:11 +00:00
else:
assert chunk.tok_type == LinkTokenType.CLOSE
2020-11-26 22:44:56 +00:00
contents.append("]]")
else:
assert isinstance(chunk, MarkerToken)
contents.append(token_from_type(chunk.tok_type))
2020-11-26 22:44:56 +00:00
return "".join(contents)
2020-10-09 22:39:32 +00:00
class Bold:
Marker = "*"
2020-10-25 19:23:08 +00:00
def __init__(self, contents, line):
2020-10-09 22:39:32 +00:00
self.contents = contents
def get_raw(self):
raw = "".join(map(get_raw, self.contents))
2020-10-25 19:23:08 +00:00
return f"{self.Marker}{raw}{self.Marker}"
2020-10-09 22:39:32 +00:00
2020-10-09 22:39:32 +00:00
class Code:
Marker = "~"
2020-10-25 19:23:08 +00:00
def __init__(self, contents, line):
2020-10-09 22:39:32 +00:00
self.contents = contents
def get_raw(self):
raw = "".join(map(get_raw, self.contents))
2020-10-25 19:23:08 +00:00
return f"{self.Marker}{raw}{self.Marker}"
2020-10-09 22:39:32 +00:00
2020-10-09 22:39:32 +00:00
class Italic:
Marker = "/"
2020-10-25 19:23:08 +00:00
def __init__(self, contents, line):
2020-10-09 22:39:32 +00:00
self.contents = contents
def get_raw(self):
raw = "".join(map(get_raw, self.contents))
2020-10-25 19:23:08 +00:00
return f"{self.Marker}{raw}{self.Marker}"
2020-10-09 22:39:32 +00:00
2020-10-09 22:39:32 +00:00
class Strike:
Marker = "+"
2020-10-25 19:23:08 +00:00
def __init__(self, contents, line):
2020-10-09 22:39:32 +00:00
self.contents = contents
def get_raw(self):
raw = "".join(map(get_raw, self.contents))
2020-10-25 19:23:08 +00:00
return f"{self.Marker}{raw}{self.Marker}"
2020-10-09 22:39:32 +00:00
2020-10-09 22:39:32 +00:00
class Underlined:
Marker = "_"
2020-10-25 19:23:08 +00:00
def __init__(self, contents, line):
2020-10-09 22:39:32 +00:00
self.contents = contents
def get_raw(self):
raw = "".join(map(get_raw, self.contents))
2020-10-25 19:23:08 +00:00
return f"{self.Marker}{raw}{self.Marker}"
2020-10-09 22:39:32 +00:00
2020-10-09 22:39:32 +00:00
class Verbatim:
Marker = "="
2020-10-25 19:23:08 +00:00
def __init__(self, contents, line):
2020-10-09 22:39:32 +00:00
self.contents = contents
def get_raw(self):
raw = "".join(map(get_raw, self.contents))
2020-10-25 19:23:08 +00:00
return f"{self.Marker}{raw}{self.Marker}"
def is_pre(char: str) -> bool:
if isinstance(char, str):
return char in "\n\r\t -({'\""
2020-10-25 19:23:08 +00:00
else:
return True
2020-10-25 19:23:08 +00:00
def is_marker(char: str) -> bool:
if isinstance(char, str):
return char in "*=/+_~"
2020-10-25 19:23:08 +00:00
else:
return False
2020-10-25 19:23:08 +00:00
def is_border(char: str) -> bool:
if isinstance(char, str):
return char not in "\n\r\t "
2020-10-25 19:23:08 +00:00
else:
return False
2020-10-25 19:23:08 +00:00
def is_body(char: str) -> bool:
if isinstance(char, str):
return True
else:
return False
2020-10-25 19:23:08 +00:00
def is_post(char: str) -> bool:
if isinstance(char, str):
return char in "-.,;:!?')}[\""
2020-10-25 19:23:08 +00:00
else:
return False
2020-10-25 19:23:08 +00:00
TOKEN_TYPE_TEXT = 0
TOKEN_TYPE_OPEN_MARKER = 1
TOKEN_TYPE_CLOSE_MARKER = 2
2020-11-02 22:51:11 +00:00
TOKEN_TYPE_OPEN_LINK = 3
TOKEN_TYPE_CLOSE_LINK = 4
TOKEN_TYPE_OPEN_DESCRIPTION = 5
2020-10-25 19:23:08 +00:00
2020-10-25 19:23:08 +00:00
def tokenize_contents(contents: str):
tokens = []
last_char = None
text = []
closes = set()
2020-11-02 22:51:11 +00:00
in_link = False
in_link_description = False
last_link_start = 0
def cut_string():
nonlocal text
nonlocal tokens
2020-10-25 19:23:08 +00:00
2020-11-02 22:51:11 +00:00
if len(text) > 0:
tokens.append((TOKEN_TYPE_TEXT, "".join(text)))
text = []
cursor = enumerate(contents)
for i, char in cursor:
2020-10-25 19:23:08 +00:00
has_changed = False
2020-11-02 22:51:11 +00:00
# Possible link opening
2020-11-26 22:44:56 +00:00
if char == "[":
if (
len(contents) > i + 3
2020-11-02 22:51:11 +00:00
# At least 3 characters more to open and close a link
2020-11-26 22:44:56 +00:00
and contents[i + 1] == "["
):
close = contents.find("]", i)
2020-11-02 22:51:11 +00:00
2020-11-26 22:44:56 +00:00
if close != -1 and contents[close + 1] == "]":
2020-11-02 22:51:11 +00:00
# Link with no description
cut_string()
in_link = True
tokens.append((TOKEN_TYPE_OPEN_LINK, None))
2020-11-26 22:44:56 +00:00
assert "[" == (next(cursor)[1])
2020-11-02 22:51:11 +00:00
last_link_start = i
continue
2020-11-26 22:44:56 +00:00
if close != -1 and contents[close + 1] == "[":
2020-11-02 22:51:11 +00:00
# Link with description?
2020-11-26 22:44:56 +00:00
close = contents.find("]", close + 1)
if close != -1 and contents[close + 1] == "]":
2020-11-02 22:51:11 +00:00
# No match here means this is not an Org link
cut_string()
in_link = True
tokens.append((TOKEN_TYPE_OPEN_LINK, None))
2020-11-26 22:44:56 +00:00
assert "[" == (next(cursor)[1])
2020-11-02 22:51:11 +00:00
last_link_start = i
continue
# Possible link close or open of description
2020-11-26 22:44:56 +00:00
if char == "]" and in_link:
if contents[i + 1] == "]":
2020-11-02 22:51:11 +00:00
cut_string()
tokens.append((TOKEN_TYPE_CLOSE_LINK, None))
2020-11-26 22:44:56 +00:00
assert "]" == (next(cursor)[1])
2020-11-02 22:51:11 +00:00
in_link = False
in_link_description = False
continue
2020-11-26 22:44:56 +00:00
if contents[i + 1] == "[" and not in_link_description:
2020-11-02 22:51:11 +00:00
cut_string()
tokens.append((TOKEN_TYPE_OPEN_DESCRIPTION, None))
2020-11-26 22:44:56 +00:00
assert "[" == (next(cursor)[1])
2020-11-02 22:51:11 +00:00
continue
2020-11-26 22:44:56 +00:00
raise Exception(
"Link cannot contain ']' not followed by '[' or ']'. Starting with {}".format(
contents[last_link_start : i + 10]
)
)
2020-11-02 22:51:11 +00:00
2020-11-26 22:44:56 +00:00
if in_link and not in_link_description:
2020-11-02 22:51:11 +00:00
# Link's pointer have no formatting
pass
elif (
(i not in closes)
and is_marker(char)
and is_pre(last_char)
and ((i + 1 < len(contents)) and is_border(contents[i + 1]))
):
2020-10-25 19:23:08 +00:00
is_valid_mark = False
# Check that is closed later
text_in_line = True
for j in range(i, len(contents) - 1):
if contents[j] == "\n":
2020-10-25 19:23:08 +00:00
if not text_in_line:
break
text_in_line = False
elif is_border(contents[j]) and contents[j + 1] == char:
is_valid_mark = True
closes.add(j + 1)
break
else:
text_in_line |= is_body(contents[j])
if is_valid_mark:
2020-11-02 22:51:11 +00:00
cut_string()
2020-10-25 19:23:08 +00:00
tokens.append((TOKEN_TYPE_OPEN_MARKER, char))
has_changed = True
elif i in closes:
2020-11-02 22:51:11 +00:00
cut_string()
2020-10-25 19:23:08 +00:00
tokens.append((TOKEN_TYPE_CLOSE_MARKER, char))
has_changed = True
if not has_changed:
text.append(char)
last_char = char
if len(text) > 0:
tokens.append((TOKEN_TYPE_TEXT, "".join(text)))
2020-10-25 19:23:08 +00:00
return tokens
2020-10-09 22:39:32 +00:00
def parse_contents(raw_contents: List[RawLine]):
if len(raw_contents) == 0:
return []
blocks = []
current_block = []
for line in raw_contents:
if len(current_block) == 0:
# Seed the first block
current_line = line.linenum
current_block.append(line)
else:
if line.linenum == current_line + 1:
# Continue with the current block
current_line = line.linenum
current_block.append(line)
else:
# Split the blocks
blocks.append(current_block)
current_line = line.linenum
current_block = [line]
# Check that the current block is not left behind
if len(current_block) > 0:
blocks.append(current_block)
return [parse_content_block(block) for block in blocks]
def parse_content_block(raw_contents: List[RawLine]):
2020-10-09 22:39:32 +00:00
contents_buff = []
for line in raw_contents:
contents_buff.append(line.line)
contents = "\n".join(contents_buff)
2020-10-25 19:23:08 +00:00
tokens = tokenize_contents(contents)
current_line = raw_contents[0].linenum
2020-10-25 19:23:08 +00:00
contents = []
# Use tokens to tag chunks of text with it's container type
for (tok_type, tok_val) in tokens:
if tok_type == TOKEN_TYPE_TEXT:
contents.append(tok_val)
2020-10-25 19:23:08 +00:00
elif tok_type == TOKEN_TYPE_OPEN_MARKER:
contents.append(MarkerToken(False, MARKERS[tok_val]))
2020-10-25 19:23:08 +00:00
elif tok_type == TOKEN_TYPE_CLOSE_MARKER:
contents.append(MarkerToken(True, MARKERS[tok_val]))
2020-11-02 22:51:11 +00:00
elif tok_type == TOKEN_TYPE_OPEN_LINK:
contents.append(LinkToken(LinkTokenType.OPEN_LINK))
elif tok_type == TOKEN_TYPE_OPEN_DESCRIPTION:
contents.append(LinkToken(LinkTokenType.OPEN_DESCRIPTION))
elif tok_type == TOKEN_TYPE_CLOSE_LINK:
contents.append(LinkToken(LinkTokenType.CLOSE))
2020-10-25 19:23:08 +00:00
return Text(contents, current_line)
2020-10-09 22:39:32 +00:00
def dump_contents(raw):
if isinstance(raw, RawLine):
return (raw.linenum, raw.line)
return (raw.linenum, raw.get_raw())
def parse_headline(hl, doc, parent) -> Headline:
stars = hl["orig"].group("stars")
2020-06-27 17:20:34 +00:00
depth = len(stars)
spacing = hl["orig"].group("spacing")
2020-06-27 17:20:34 +00:00
# TODO: Parse line for priority, cookies and tags
line = hl["orig"].group("line")
hl_tags = HEADLINE_TAGS_RE.search(line)
if hl_tags is None:
tags = []
else:
tags = hl_tags.group(0)[1:-1].split(":")
line = HEADLINE_TAGS_RE.sub("", line)
hl_state = None
title = line
is_done = is_todo = False
for state in doc.todo_keywords or []:
if title.startswith(state + " "):
hl_state = state
title = title[len(state + " ") :]
is_todo = True
break
else:
for state in doc.done_keywords or []:
if title.startswith(state + " "):
hl_state = state
title = title[len(state + " ") :]
is_done = True
break
contents = parse_contents(hl["contents"])
headline = Headline(
start_line=hl["linenum"],
depth=depth,
orig=hl["orig"],
title=title,
state=hl_state,
contents=contents,
children=None,
keywords=hl["keywords"],
properties=hl["properties"],
structural=hl["structural"],
2020-12-05 23:26:44 +00:00
delimiters=hl["delimiters"],
title_start=None,
priority=None,
priority_start=None,
tags_start=None,
tags=tags,
parent=parent,
is_todo=is_todo,
is_done=is_done,
spacing=spacing,
2020-06-27 17:20:34 +00:00
)
headline.children = [
parse_headline(child, doc, headline) for child in hl["children"]
]
return headline
2020-06-27 17:20:34 +00:00
2020-12-20 11:39:47 +00:00
class OrgDoc:
def __init__(self, headlines, keywords, contents):
self.todo_keywords = DEFAULT_TODO_KEYWORDS
self.done_keywords = DEFAULT_DONE_KEYWORDS
for keyword in keywords:
if keyword.key == "TODO":
todo_kws, done_kws = re.sub(r"\(.\)", "", keyword.value).split("|", 1)
self.todo_keywords = re.sub(r"\s{2,}", " ", todo_kws.strip()).split()
self.done_keywords = re.sub(r"\s{2,}", " ", done_kws.strip()).split()
2020-06-21 19:27:40 +00:00
self.keywords: List[Property] = keywords
self.contents: List[RawLine] = contents
self._path = None
self.headlines: List[Headline] = list(
map(lambda hl: parse_headline(hl, self, self), headlines)
)
@property
def path(self):
return self._path
2020-06-21 19:27:40 +00:00
## Querying
2020-11-02 22:51:11 +00:00
def get_links(self):
for headline in self.headlines:
yield from headline.get_links()
for content in self.contents:
yield from get_links_from_content(content)
2020-06-21 19:27:40 +00:00
def getProperties(self):
return self.keywords
2020-06-21 19:27:40 +00:00
def getTopHeadlines(self):
return self.headlines
def getAllHeadlines(self) -> Generator[Headline]:
todo = self.headlines[::-1] # We go backwards, to pop/append and go depth-first
while len(todo) != 0:
hl = todo.pop()
todo.extend(hl.children[::-1])
yield hl
2020-12-05 23:26:44 +00:00
def get_code_snippets(self):
for headline in self.headlines:
yield from headline.get_code_snippets()
# Writing
def dump_kw(self, kw):
options = kw.match.group("options")
if not options:
options = ""
return (
kw.linenum,
"{indentation}#+{key}{options}:{spacing}{value}".format(
indentation=kw.match.group("indentation"),
key=kw.key,
options=kw.options,
spacing=kw.match.group("spacing"),
value=kw.value,
),
)
def dump_property(self, prop: Property):
plus = prop.match.group("plus")
if plus is None:
plus = ""
2021-01-17 11:40:15 +00:00
if isinstance(prop.value, TimeRange):
value = timerange_to_string(prop.value)
2021-01-17 11:40:15 +00:00
elif isinstance(prop.value, OrgTime):
value = prop.value.to_raw()
else:
value = prop.value
return (
prop.linenum,
"{indentation}:{key}{plus}:{spacing}{value}".format(
indentation=prop.match.group("indentation"),
key=prop.key,
plus=plus,
spacing=prop.match.group("spacing"),
value=value,
),
)
def dump_structural(self, structural: Tuple):
return (structural[0], structural[1])
2020-12-05 23:26:44 +00:00
def dump_delimiters(self, line: DelimiterLine):
return (line.linenum, line.line)
def dump_headline(self, headline):
tags = ""
if len(headline.shallow_tags) > 0:
tags = ":" + ":".join(headline.shallow_tags) + ":"
state = ""
if headline.state:
state = headline.state + " "
yield "*" * headline.depth + headline.spacing + state + headline.title + tags
2021-01-11 23:58:28 +00:00
planning = headline.get_planning_line()
if planning is not None:
yield planning
lines = []
KW_T = 0
CONTENT_T = 1
PROPERTIES_T = 2
STRUCTURAL_T = 3
2020-06-27 17:20:34 +00:00
for keyword in headline.keywords:
lines.append((KW_T, self.dump_kw(keyword)))
2020-06-27 17:20:34 +00:00
for content in headline.contents:
lines.append((CONTENT_T, dump_contents(content)))
2020-06-27 17:20:34 +00:00
for prop in headline.properties:
lines.append((PROPERTIES_T, self.dump_property(prop)))
2020-06-27 17:20:34 +00:00
for struct in headline.structural:
lines.append((STRUCTURAL_T, self.dump_structural(struct)))
2020-12-05 23:26:44 +00:00
for content in headline.delimiters:
lines.append((STRUCTURAL_T, self.dump_delimiters(content)))
lines = sorted(lines, key=lambda x: x[1][0])
structured_lines = []
last_type = None
for i, line in enumerate(lines):
ltype = line[0]
content = line[1][1]
if ltype == PROPERTIES_T and last_type not in (STRUCTURAL_T, PROPERTIES_T):
# No structural opening
structured_lines.append(" " * content.index(":") + ":PROPERTIES:\n")
logging.warning(
"Added structural: ".format(
line[1][0], structured_lines[-1].strip()
)
)
elif (
ltype not in (STRUCTURAL_T, PROPERTIES_T) and last_type == PROPERTIES_T
):
# No structural closing
last_line = lines[i - 1][1][1]
structured_lines.append(" " * last_line.index(":") + ":END:\n")
logging.warning(
"Added structural:{}: {}".format(
line[1][0], structured_lines[-1].strip()
)
)
content = content + "\n"
last_type = ltype
structured_lines.append(content)
if len(structured_lines) > 0:
content = "".join(structured_lines)
# Remove the last line jump, which will be accounted for by the "yield operation"
assert content.endswith("\n")
content = content[:-1]
yield content
2020-06-27 17:20:34 +00:00
for child in headline.children:
yield from self.dump_headline(child)
def dump(self):
lines = []
for kw in self.keywords:
lines.append(self.dump_kw(kw))
for line in self.contents:
lines.append(dump_contents(line))
yield from map(lambda x: x[1], sorted(lines, key=lambda x: x[0]))
for headline in self.headlines:
yield from self.dump_headline(headline)
2020-06-21 19:27:40 +00:00
2020-12-20 11:39:47 +00:00
class OrgDocReader:
2020-06-21 19:27:40 +00:00
def __init__(self):
self.headlines: List[Headline] = []
self.keywords: List[Property] = []
2020-12-20 11:39:47 +00:00
self.headline_hierarchy: List[OrgDoc] = []
self.contents: List[RawLine] = []
2020-12-05 23:26:44 +00:00
self.delimiters: List[DelimiterLine] = []
2020-06-21 19:27:40 +00:00
def finalize(self):
2020-12-20 11:39:47 +00:00
return OrgDoc(self.headlines, self.keywords, self.contents)
2020-06-21 19:27:40 +00:00
## Construction
def add_headline(self, linenum: int, match: re.Match) -> int:
# Position reader on the proper headline
stars = match.group("stars")
2020-06-27 17:20:34 +00:00
depth = len(stars)
2020-06-21 19:27:40 +00:00
headline = {
"linenum": linenum,
"orig": match,
"title": match.group("line"),
"contents": [],
"children": [],
"keywords": [],
"properties": [],
2020-11-26 22:42:39 +00:00
"logbook": [],
"structural": [],
2020-12-05 23:26:44 +00:00
"delimiters": [],
"results": [], # TODO: Move to each specific code block?
2020-06-21 19:27:40 +00:00
}
2020-06-27 17:20:34 +00:00
while (depth - 2) > len(self.headline_hierarchy):
2020-06-21 19:27:40 +00:00
# Introduce structural headlines
self.headline_hierarchy.append(None)
while depth < len(self.headline_hierarchy):
self.headline_hierarchy.pop()
2020-06-27 17:20:34 +00:00
if depth == 1:
2020-06-21 19:27:40 +00:00
self.headlines.append(headline)
else:
self.headline_hierarchy[-1]["children"].append(headline)
2020-06-21 19:27:40 +00:00
self.headline_hierarchy.append(headline)
def add_keyword_line(self, linenum: int, match: re.Match) -> int:
options = match.group("options")
kw = Keyword(
linenum,
match,
match.group("key"),
match.group("value"),
options if options is not None else "",
)
2020-06-21 19:27:40 +00:00
if len(self.headline_hierarchy) == 0:
self.keywords.append(kw)
2020-06-21 19:27:40 +00:00
else:
self.headline_hierarchy[-1]["keywords"].append(kw)
2020-06-21 19:27:40 +00:00
def add_raw_line(self, linenum: int, line: str) -> int:
raw = RawLine(linenum, line)
if len(self.headline_hierarchy) == 0:
self.contents.append(raw)
else:
self.headline_hierarchy[-1]["contents"].append(raw)
2020-06-21 19:27:40 +00:00
def add_begin_src_line(self, linenum: int, match: re.Match) -> int:
2020-12-05 23:26:44 +00:00
line = DelimiterLine(linenum, match.group(0), DelimiterLineType.BEGIN_SRC)
if len(self.headline_hierarchy) == 0:
2020-12-05 23:26:44 +00:00
self.delimiters.append(line)
else:
2020-12-05 23:26:44 +00:00
self.headline_hierarchy[-1]["delimiters"].append(line)
def add_end_src_line(self, linenum: int, match: re.Match) -> int:
2020-12-05 23:26:44 +00:00
line = DelimiterLine(linenum, match.group(0), DelimiterLineType.END_SRC)
if len(self.headline_hierarchy) == 0:
2020-12-05 23:26:44 +00:00
self.delimiters.append(line)
else:
2020-12-05 23:26:44 +00:00
self.headline_hierarchy[-1]["delimiters"].append(line)
def add_property_drawer_line(self, linenum: int, line: str, match: re.Match) -> int:
self.current_drawer = self.headline_hierarchy[-1]["properties"]
self.headline_hierarchy[-1]["structural"].append((linenum, line))
2020-06-21 19:27:40 +00:00
def add_results_drawer_line(self, linenum: int, line: str, match: re.Match) -> int:
self.current_drawer = self.headline_hierarchy[-1]["results"]
self.headline_hierarchy[-1]["structural"].append((linenum, line))
2020-11-26 22:42:39 +00:00
def add_logbook_drawer_line(self, linenum: int, line: str, match: re.Match) -> int:
self.current_drawer = self.headline_hierarchy[-1]["logbook"]
self.headline_hierarchy[-1]["structural"].append((linenum, line))
def add_drawer_end_line(self, linenum: int, line: str, match: re.Match) -> int:
2020-06-21 19:27:40 +00:00
self.current_drawer = None
self.headline_hierarchy[-1]["structural"].append((linenum, line))
2020-06-21 19:27:40 +00:00
def add_node_properties_line(self, linenum: int, match: re.Match) -> int:
key = match.group("key")
value = match.group("value").strip()
2020-06-21 19:27:40 +00:00
2021-01-17 12:04:27 +00:00
if as_time := parse_time(value):
2020-06-21 19:27:40 +00:00
value = as_time
try:
self.current_drawer.append(Property(linenum, match, key, value, None))
except:
if "current_drawer" not in dir(self): # Throw a better error on this case
raise Exception(
"Found properties before :PROPERTIES: line. Error on Org file?"
)
else:
raise # Let the exception pass
2020-06-21 19:27:40 +00:00
def read(self, s, environment):
lines = s.split("\n")
line_count = len(lines)
2020-06-21 19:27:40 +00:00
reader = enumerate(lines)
for lnum, line in reader:
linenum = lnum + 1
try:
if m := RAW_LINE_RE.match(line):
self.add_raw_line(linenum, line)
elif m := HEADLINE_RE.match(line):
self.add_headline(linenum, m)
2020-12-05 23:26:44 +00:00
# Org-babel
elif m := BEGIN_SRC_RE.match(line):
self.add_begin_src_line(linenum, m)
elif m := END_SRC_RE.match(line):
self.add_end_src_line(linenum, m)
# Generic properties
elif m := KEYWORDS_RE.match(line):
self.add_keyword_line(linenum, m)
elif m := DRAWER_START_RE.match(line):
self.add_property_drawer_line(linenum, line, m)
elif m := DRAWER_END_RE.match(line):
self.add_drawer_end_line(linenum, line, m)
elif m := RESULTS_DRAWER_RE.match(line):
self.add_results_drawer_line(linenum, line, m)
elif m := NODE_PROPERTIES_RE.match(line):
self.add_node_properties_line(linenum, m)
# Not captured
else:
self.add_raw_line(linenum, line)
except:
logging.error("Error line {}: {}".format(linenum + 1, line))
raise
2020-06-21 19:27:40 +00:00
def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=True):
2020-12-20 11:39:47 +00:00
reader = OrgDocReader()
reader.read(s, environment)
doc = reader.finalize()
if extra_cautious: # Check that all options can be properly re-serialized
2020-12-20 11:39:47 +00:00
after_dump = dumps(doc)
if after_dump != s:
diff = list(
difflib.Differ().compare(
s.splitlines(keepends=True), after_dump.splitlines(keepends=True)
)
)
sys.stderr.writelines(diff)
# print("---\n" + after_dump + "\n---")
raise Exception("Difference found between existing version and dumped")
2020-12-20 11:39:47 +00:00
return doc
def load(f, environment=BASE_ENVIRONMENT, extra_cautious=False):
doc = loads(f.read(), environment, extra_cautious)
doc._path = os.path.abspath(f.name)
return doc
2020-06-21 19:27:40 +00:00
def dumps(doc):
2020-10-09 22:39:32 +00:00
dump = list(doc.dump())
result = "\n".join(dump)
# print(result)
return result
def dump(doc, fp):
it = doc.dump()
# Write first line separately
line = next(it)
fp.write(line)
# Write following ones preceded by line jump
for line in it:
fp.write("\n" + line)