Compare commits
2 Commits
1d3b4c187b
...
0e5636d2f5
Author | SHA1 | Date | |
---|---|---|---|
|
0e5636d2f5 | ||
|
6476e2a511 |
34
org_rw/README.org
Normal file
34
org_rw/README.org
Normal file
@ -0,0 +1,34 @@
|
||||
* Org-rw
|
||||
|
||||
A python library to parse, modify and save Org-mode files.
|
||||
|
||||
* Goals
|
||||
- Reading org-mode files, with all the relevant information (format, dates, lists, links, metadata, ...).
|
||||
- Modify these data and write it back to disk.
|
||||
- Keep the original structure intact (indentation, spaces, format, ...).
|
||||
|
||||
** Safety mechanism
|
||||
As this library is still in early development. Running it over files might
|
||||
produce unexpected changes on them. For this reason it's heavily recommended to
|
||||
have backup copies before using it on important files.
|
||||
|
||||
By default the library checks that the re-serialization of the loaded files will
|
||||
not produce any change, and throw an error in case it does. But this cannot
|
||||
guarantee that later changes to the document will not corrupt the output so be
|
||||
careful.
|
||||
|
||||
Also, see [[id:76e77f7f-c9e0-4c83-ad2f-39a5a8894a83][Known issues:Structure modifications]] for cases when the structure is
|
||||
not properly stored and can trigger this safety mechanism on a false-positive.
|
||||
|
||||
* Known issues
|
||||
** Structure modifications
|
||||
:PROPERTIES:
|
||||
:ID: 76e77f7f-c9e0-4c83-ad2f-39a5a8894a83
|
||||
:END:
|
||||
- The exact format is not retained when saving dates/times. This might cause problems with the safety mechanism if you have dates that.
|
||||
Note that in both cases, doing ~C-c C-c~ on the date (from Emacs) will change it to the format that Org-rw serializes it to.
|
||||
- Use multiple dashes for hour ranges, like =<2020-12-01 10:00----11:00>=. It will get re-serialized as =<2020-12-01 10:00-11:00>=, thus triggering the safety mechanism as unexpected changes have happened.
|
||||
- Same in case hours are not two digits (with leading 0's if needed), like =<2020-12-01 9:00>=. It will get serialized as =<2020-12-01 9:00>=.
|
||||
|
||||
* Other python libraries for org-mode
|
||||
- [[https://github.com/karlicoss/orgparse][orgparse]] :: More mature, but does not provide format support or writing back to disk.
|
133
org_rw/org_rw.py
133
org_rw/org_rw.py
@ -1,3 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
import difflib
|
||||
import logging
|
||||
@ -54,8 +56,8 @@ NODE_PROPERTIES_RE = re.compile(
|
||||
r"^(?P<indentation>\s*):(?P<key>[^+:]+)(?P<plus>\+)?:(?P<spacing>\s*)(?P<value>.+)$"
|
||||
)
|
||||
RAW_LINE_RE = re.compile(r"^\s*([^\s#:*]|$)")
|
||||
BASE_TIME_STAMP_RE = r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})( ?(?P<dow>[^ ]+))?( (?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2})(--(?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?)?(?P<repetition> (?P<rep_mark>(\+|\+\+|\.\+|-|--))(?P<rep_value>\d+)(?P<rep_unit>[hdwmy]))?"
|
||||
CLEAN_TIME_STAMP_RE = r"\d{4}-\d{2}-\d{2}( ?([^ ]+))?( (\d{1,2}):(\d{1,2})(--(\d{1,2}):(\d{1,2}))?)?( (\+|\+\+|\.\+|-|--)\d+[hdwmy])?"
|
||||
BASE_TIME_STAMP_RE = r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})( ?(?P<dow>[^ ]+))?( (?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2})(-+(?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?)?(?P<repetition> (?P<rep_mark>(\+|\+\+|\.\+|-|--))(?P<rep_value>\d+)(?P<rep_unit>[hdwmy]))?"
|
||||
CLEAN_TIME_STAMP_RE = r"\d{4}-\d{2}-\d{2}( ?([^ ]+))?( (\d{1,2}):(\d{1,2})(-+(\d{1,2}):(\d{1,2}))?)?( (\+|\+\+|\.\+|-|--)\d+[hdwmy])?"
|
||||
|
||||
ACTIVE_TIME_STAMP_RE = re.compile(r"<{}>".format(BASE_TIME_STAMP_RE))
|
||||
INACTIVE_TIME_STAMP_RE = re.compile(r"\[{}\]".format(BASE_TIME_STAMP_RE))
|
||||
@ -275,7 +277,7 @@ class Headline:
|
||||
as_time_range = parse_org_time_range(start, end)
|
||||
parsed = as_time_range
|
||||
else:
|
||||
parsed = parse_org_time(time_seg)
|
||||
parsed = OrgTime.parse(time_seg)
|
||||
times.append(parsed)
|
||||
|
||||
return times
|
||||
@ -451,6 +453,9 @@ class Timestamp:
|
||||
self.minute = minute
|
||||
self.repetition = repetition
|
||||
|
||||
def to_datetime(self) -> datetime:
|
||||
return datetime(self.year, self.month, self.day, self.hour, self.minute)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, Timestamp):
|
||||
return False
|
||||
@ -550,7 +555,7 @@ def token_from_type(tok_type):
|
||||
|
||||
|
||||
class TimeRange:
|
||||
def __init__(self, start_time, end_time):
|
||||
def __init__(self, start_time: OrgTime, end_time: OrgTime):
|
||||
self.start_time = start_time
|
||||
self.end_time = end_time
|
||||
|
||||
@ -570,69 +575,76 @@ class TimeRange:
|
||||
return datetime(et.year, et.month, et.day, et.hour or 0, et.minute or 0)
|
||||
|
||||
|
||||
def parse_org_time_range(start, end):
|
||||
return TimeRange(parse_org_time(start), parse_org_time(end))
|
||||
|
||||
|
||||
def parse_org_time(value):
|
||||
if m := ACTIVE_TIME_STAMP_RE.match(value):
|
||||
active = True
|
||||
elif m := INACTIVE_TIME_STAMP_RE.match(value):
|
||||
active = False
|
||||
else:
|
||||
return None
|
||||
|
||||
if m.group("end_hour"):
|
||||
return TimeRange(
|
||||
Timestamp(
|
||||
active,
|
||||
int(m.group("year")),
|
||||
int(m.group("month")),
|
||||
int(m.group("day")),
|
||||
m.group("dow"),
|
||||
int(m.group("start_hour")),
|
||||
int(m.group("start_minute")),
|
||||
),
|
||||
Timestamp(
|
||||
active,
|
||||
int(m.group("year")),
|
||||
int(m.group("month")),
|
||||
int(m.group("day")),
|
||||
m.group("dow"),
|
||||
int(m.group("end_hour")),
|
||||
int(m.group("end_minute")),
|
||||
),
|
||||
)
|
||||
return Timestamp(
|
||||
active,
|
||||
int(m.group("year")),
|
||||
int(m.group("month")),
|
||||
int(m.group("day")),
|
||||
m.group("dow"),
|
||||
int(m.group("start_hour")) if m.group("start_hour") else None,
|
||||
int(m.group("start_minute")) if m.group("start_minute") else None,
|
||||
m.group("repetition").strip() if m.group("repetition") else None,
|
||||
)
|
||||
def parse_org_time_range(start, end) -> TimeRange:
|
||||
return TimeRange(OrgTime.parse(start), OrgTime.parse(end))
|
||||
|
||||
|
||||
class OrgTime:
|
||||
def __init__(self, ts: Timestamp):
|
||||
def __init__(self, ts: Timestamp, end_time: Union[Timestamp, None] = None):
|
||||
assert ts is not None
|
||||
self.time = ts
|
||||
self.end_time = end_time
|
||||
|
||||
def to_raw(self):
|
||||
return timestamp_to_string(self.time)
|
||||
return timestamp_to_string(self.time, self.end_time)
|
||||
|
||||
def __repr__(self):
|
||||
return f"OrgTime({self.to_raw()})"
|
||||
|
||||
@classmethod
|
||||
def parse(self, value: str) -> OrgTime:
|
||||
if m := ACTIVE_TIME_STAMP_RE.match(value):
|
||||
active = True
|
||||
elif m := INACTIVE_TIME_STAMP_RE.match(value):
|
||||
active = False
|
||||
else:
|
||||
return None
|
||||
|
||||
if m.group("end_hour"):
|
||||
return OrgTime(
|
||||
Timestamp(
|
||||
active,
|
||||
int(m.group("year")),
|
||||
int(m.group("month")),
|
||||
int(m.group("day")),
|
||||
m.group("dow"),
|
||||
int(m.group("start_hour")),
|
||||
int(m.group("start_minute")),
|
||||
),
|
||||
Timestamp(
|
||||
active,
|
||||
int(m.group("year")),
|
||||
int(m.group("month")),
|
||||
int(m.group("day")),
|
||||
m.group("dow"),
|
||||
int(m.group("end_hour")),
|
||||
int(m.group("end_minute")),
|
||||
),
|
||||
)
|
||||
|
||||
return OrgTime(
|
||||
Timestamp(
|
||||
active,
|
||||
int(m.group("year")),
|
||||
int(m.group("month")),
|
||||
int(m.group("day")),
|
||||
m.group("dow"),
|
||||
int(m.group("start_hour")) if m.group("start_hour") else None,
|
||||
int(m.group("start_minute")) if m.group("start_minute") else None,
|
||||
m.group("repetition").strip() if m.group("repetition") else None,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def time_from_str(s: str):
|
||||
return OrgTime(parse_org_time(s))
|
||||
def time_from_str(s: str) -> OrgTime:
|
||||
return OrgTime.parse(s)
|
||||
|
||||
|
||||
def timerange_to_string(tr: TimeRange):
|
||||
return timestamp_to_string(tr.start_time) + "--" + timestamp_to_string(tr.end_time)
|
||||
return tr.start_time.to_raw() + "--" + tr.end_time.to_raw()
|
||||
|
||||
|
||||
def timestamp_to_string(ts: Timestamp):
|
||||
def timestamp_to_string(ts: Timestamp, end_time: Union[Timestamp, None] = None) -> str:
|
||||
date = "{year}-{month:02d}-{day:02d}".format(
|
||||
year=ts.year, month=ts.month, day=ts.day
|
||||
)
|
||||
@ -646,6 +658,13 @@ def timestamp_to_string(ts: Timestamp):
|
||||
else:
|
||||
base = date
|
||||
|
||||
if end_time is not None:
|
||||
assert end_time.hour is not None
|
||||
assert end_time.minute is not None
|
||||
base = "{base}-{hour:02}:{minute:02d}".format(
|
||||
base=base, hour=end_time.hour, minute=end_time.minute
|
||||
)
|
||||
|
||||
if ts.repetition:
|
||||
base = base + " " + ts.repetition
|
||||
|
||||
@ -1164,10 +1183,10 @@ class OrgDoc:
|
||||
if plus is None:
|
||||
plus = ""
|
||||
|
||||
if isinstance(prop.value, Timestamp):
|
||||
value = timestamp_to_string(prop.value)
|
||||
elif isinstance(prop.value, TimeRange):
|
||||
if isinstance(prop.value, TimeRange):
|
||||
value = timerange_to_string(prop.value)
|
||||
elif isinstance(prop.value, OrgTime):
|
||||
value = prop.value.to_raw()
|
||||
else:
|
||||
value = prop.value
|
||||
|
||||
@ -1392,7 +1411,7 @@ class OrgDocReader:
|
||||
value = as_time_range
|
||||
else:
|
||||
raise Exception("Unknown time range format: {}".format(value))
|
||||
elif as_time := parse_org_time(value):
|
||||
elif as_time := OrgTime.parse(value):
|
||||
value = as_time
|
||||
|
||||
try:
|
||||
|
@ -8,6 +8,7 @@ SCHEDULED: <2020-12-12 Sáb> CLOSED: <2020-12-13 Dom> DEADLINE: <2020-12-14 Lun>
|
||||
:JUST_DAY: [2020-12-10]
|
||||
:DAY_AND_WEEKDAY: [2020-12-10 Xov]
|
||||
:DAY_AND_HOUR: [2020-12-10 Xov 00:02]
|
||||
:DAY_AND_HOUR_HOUR_RANGE: [2020-12-10 Xov 00:02]
|
||||
:JUST_DAY_TIME_RANGE: [2020-12-10]--[2020-12-11]
|
||||
:JUST_DAY_TIME_RANGE_NEGATIVE: [2020-12-11]--[2020-12-10]
|
||||
:DAY_AND_WEEKDAY_TIME_RANGE: [2020-12-10 Xov]--[2020-12-11 Ven]
|
||||
@ -15,3 +16,6 @@ SCHEDULED: <2020-12-12 Sáb> CLOSED: <2020-12-13 Dom> DEADLINE: <2020-12-14 Lun>
|
||||
:DAY_AND_HOUR_TIME_RANGE: [2020-12-10 00:02]--[2020-12-11 00:30]
|
||||
:DAY_AND_HOUR_TIME_RANGE_NEGATIVE: [2020-12-10 00:30]--[2020-12-11 00:02]
|
||||
:END:
|
||||
|
||||
** Scheduled for time range
|
||||
SCHEDULED: <2020-12-15 Mar 00:05-00:10>
|
||||
|
@ -409,6 +409,15 @@ class TestSerde(unittest.TestCase):
|
||||
hl.deadline.time, Timestamp(True, 2020, 12, 14, "Lun", None, None)
|
||||
)
|
||||
|
||||
hl_schedule_range = hl.children[0]
|
||||
self.assertEqual(
|
||||
hl_schedule_range.scheduled.time, Timestamp(True, 2020, 12, 15, "Mar", 0, 5)
|
||||
)
|
||||
self.assertEqual(
|
||||
hl_schedule_range.scheduled.end_time,
|
||||
Timestamp(True, 2020, 12, 15, "Mar", 0, 10),
|
||||
)
|
||||
|
||||
def test_update_info_file_05(self):
|
||||
with open(os.path.join(DIR, "05-dates.org")) as f:
|
||||
orig = f.read()
|
||||
|
@ -7,7 +7,7 @@ from org_rw import (Bold, Code, Italic, Line, Strike, Text, Underlined,
|
||||
|
||||
|
||||
def timestamp_to_datetime(ts):
|
||||
return datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute)
|
||||
return ts.time.to_datetime()
|
||||
|
||||
|
||||
def get_raw(doc):
|
||||
|
Loading…
Reference in New Issue
Block a user