forked from kenkeiras/org-rw
Format with black, use tokens for markup segmentation.
- Don't use trees in first instance as interleaving might be lossy.
This commit is contained in:
parent
f6de69fd90
commit
e73ce5d480
@ -1,11 +1,12 @@
|
|||||||
|
import collections
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import collections
|
from enum import Enum
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
BASE_ENVIRONMENT = {
|
BASE_ENVIRONMENT = {
|
||||||
'org-footnote-section': 'Footnotes',
|
"org-footnote-section": "Footnotes",
|
||||||
'org-options-keywords': (
|
"org-options-keywords": (
|
||||||
"ARCHIVE:",
|
"ARCHIVE:",
|
||||||
"AUTHOR:",
|
"AUTHOR:",
|
||||||
"BIND:",
|
"BIND:",
|
||||||
@ -30,52 +31,103 @@ BASE_ENVIRONMENT = {
|
|||||||
"SEQ_TODO:",
|
"SEQ_TODO:",
|
||||||
"SETUPFILE:",
|
"SETUPFILE:",
|
||||||
"STARTUP:",
|
"STARTUP:",
|
||||||
"TAGS:"
|
"TAGS:" "TITLE:",
|
||||||
"TITLE:",
|
|
||||||
"TODO:",
|
"TODO:",
|
||||||
"TYP_TODO:",
|
"TYP_TODO:",
|
||||||
"SELECT_TAGS:",
|
"SELECT_TAGS:",
|
||||||
"EXCLUDE_TAGS:"
|
"EXCLUDE_TAGS:",
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
HEADLINE_RE = re.compile(r'^(?P<stars>\*+) (?P<spacing>\s*)(?P<line>.*)$')
|
HEADLINE_RE = re.compile(r"^(?P<stars>\*+) (?P<spacing>\s*)(?P<line>.*)$")
|
||||||
KEYWORDS_RE = re.compile(r'^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$')
|
KEYWORDS_RE = re.compile(
|
||||||
PROPERTY_DRAWER_RE = re.compile(r'^(?P<indentation>\s*):PROPERTIES:(?P<end_indentation>\s*)$')
|
r"^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$"
|
||||||
DRAWER_END_RE = re.compile(r'^(?P<indentation>\s*):END:(?P<end_indentation>\s*)$')
|
)
|
||||||
NODE_PROPERTIES_RE = re.compile(r'^(?P<indentation>\s*):(?P<key>[^+:]+)(?P<plus>\+)?:(?P<spacing>\s*)(?P<value>.*)$')
|
PROPERTY_DRAWER_RE = re.compile(
|
||||||
RAW_LINE_RE = re.compile(r'^\s*([^\s#:*]|$)')
|
r"^(?P<indentation>\s*):PROPERTIES:(?P<end_indentation>\s*)$"
|
||||||
BASE_TIME_STAMP_RE = r'(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}) (?P<dow>[^ ]+)( (?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2})(--(?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?)?'
|
)
|
||||||
|
DRAWER_END_RE = re.compile(r"^(?P<indentation>\s*):END:(?P<end_indentation>\s*)$")
|
||||||
|
NODE_PROPERTIES_RE = re.compile(
|
||||||
|
r"^(?P<indentation>\s*):(?P<key>[^+:]+)(?P<plus>\+)?:(?P<spacing>\s*)(?P<value>.*)$"
|
||||||
|
)
|
||||||
|
RAW_LINE_RE = re.compile(r"^\s*([^\s#:*]|$)")
|
||||||
|
BASE_TIME_STAMP_RE = r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}) (?P<dow>[^ ]+)( (?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2})(--(?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?)?"
|
||||||
|
|
||||||
ACTIVE_TIME_STAMP_RE = re.compile(r'<{}>'.format(BASE_TIME_STAMP_RE))
|
ACTIVE_TIME_STAMP_RE = re.compile(r"<{}>".format(BASE_TIME_STAMP_RE))
|
||||||
INACTIVE_TIME_STAMP_RE = re.compile(r'\[{}\]'.format(BASE_TIME_STAMP_RE))
|
INACTIVE_TIME_STAMP_RE = re.compile(r"\[{}\]".format(BASE_TIME_STAMP_RE))
|
||||||
|
|
||||||
# BASE_TIME_RANGE_RE = (r'(?P<start_year>\d{4})-(?P<start_month>\d{2})-(?P<start_day>\d{2}) (?P<start_dow>[^ ]+)((?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2}))?',
|
# BASE_TIME_RANGE_RE = (r'(?P<start_year>\d{4})-(?P<start_month>\d{2})-(?P<start_day>\d{2}) (?P<start_dow>[^ ]+)((?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2}))?',
|
||||||
# r'(?P<end_year>\d{4})-(?P<end_month>\d{2})-(?P<end_day>\d{2}) (?P<end_dow>[^ ]+)((?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?')
|
# r'(?P<end_year>\d{4})-(?P<end_month>\d{2})-(?P<end_day>\d{2}) (?P<end_dow>[^ ]+)((?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?')
|
||||||
|
|
||||||
Headline = collections.namedtuple('Headline', ('start_line', 'depth',
|
Headline = collections.namedtuple(
|
||||||
'orig',
|
"Headline",
|
||||||
'properties', 'keywords',
|
(
|
||||||
'priority_start', 'priority',
|
"start_line",
|
||||||
'title_start', 'title',
|
"depth",
|
||||||
'tags_start', 'tags',
|
"orig",
|
||||||
'contents',
|
"properties",
|
||||||
'children',
|
"keywords",
|
||||||
'structural',
|
"priority_start",
|
||||||
))
|
"priority",
|
||||||
|
"title_start",
|
||||||
|
"title",
|
||||||
|
"tags_start",
|
||||||
|
"tags",
|
||||||
|
"contents",
|
||||||
|
"children",
|
||||||
|
"structural",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
RawLine = collections.namedtuple('RawLine', ('linenum', 'line'))
|
RawLine = collections.namedtuple("RawLine", ("linenum", "line"))
|
||||||
Keyword = collections.namedtuple('Keyword', ('linenum', 'match', 'key', 'value', 'options'))
|
Keyword = collections.namedtuple(
|
||||||
Property = collections.namedtuple('Property', ('linenum', 'match', 'key', 'value', 'options'))
|
"Keyword", ("linenum", "match", "key", "value", "options")
|
||||||
|
)
|
||||||
|
Property = collections.namedtuple(
|
||||||
|
"Property", ("linenum", "match", "key", "value", "options")
|
||||||
|
)
|
||||||
|
|
||||||
# @TODO How are [YYYY-MM-DD HH:mm--HH:mm] and ([... HH:mm]--[... HH:mm]) differentiated ?
|
# @TODO How are [YYYY-MM-DD HH:mm--HH:mm] and ([... HH:mm]--[... HH:mm]) differentiated ?
|
||||||
# @TODO Consider recurrence annotations
|
# @TODO Consider recurrence annotations
|
||||||
TimeRange = collections.namedtuple('TimeRange', ('start_time', 'end_time'))
|
TimeRange = collections.namedtuple("TimeRange", ("start_time", "end_time"))
|
||||||
Timestamp = collections.namedtuple('Timestamp', ('active', 'year', 'month', 'day', 'dow', 'hour', 'minute'))
|
Timestamp = collections.namedtuple(
|
||||||
|
"Timestamp", ("active", "year", "month", "day", "dow", "hour", "minute")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MarkerType(Enum):
|
||||||
|
NO_MODE = 0b0
|
||||||
|
BOLD_MODE = 0b1
|
||||||
|
CODE_MODE = 0b10
|
||||||
|
ITALIC_MODE = 0b100
|
||||||
|
STRIKE_MODE = 0b1000
|
||||||
|
UNDERLINED_MODE = 0b10000
|
||||||
|
VERBATIM_MODE = 0b100000
|
||||||
|
|
||||||
|
MARKERS = {
|
||||||
|
"*": MarkerType.BOLD_MODE,
|
||||||
|
"~": MarkerType.CODE_MODE,
|
||||||
|
"/": MarkerType.ITALIC_MODE,
|
||||||
|
"+": MarkerType.STRIKE_MODE,
|
||||||
|
"_": MarkerType.UNDERLINED_MODE,
|
||||||
|
"=": MarkerType.VERBATIM_MODE,
|
||||||
|
}
|
||||||
|
|
||||||
|
ModeToMarker = {}
|
||||||
|
|
||||||
|
for tok, mode in MARKERS.items():
|
||||||
|
ModeToMarker[mode] = tok
|
||||||
|
|
||||||
|
MarkerToken = collections.namedtuple("MarkerToken", ("closing", "tok_type"))
|
||||||
|
|
||||||
|
BEGIN_PROPERTIES = "OPEN_PROPERTIES"
|
||||||
|
END_PROPERTIES = "CLOSE_PROPERTIES"
|
||||||
|
|
||||||
|
def token_from_type(tok_type):
|
||||||
|
print(ModeToMarker, tok_type)
|
||||||
|
return ModeToMarker[tok_type]
|
||||||
|
|
||||||
BEGIN_PROPERTIES = 'OPEN_PROPERTIES'
|
|
||||||
END_PROPERTIES = 'CLOSE_PROPERTIES'
|
|
||||||
|
|
||||||
def parse_org_time(value):
|
def parse_org_time(value):
|
||||||
if m := ACTIVE_TIME_STAMP_RE.match(value):
|
if m := ACTIVE_TIME_STAMP_RE.match(value):
|
||||||
@ -85,29 +137,57 @@ def parse_org_time(value):
|
|||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if m.group('end_hour'):
|
if m.group("end_hour"):
|
||||||
return TimeRange(Timestamp(active, int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('start_hour')), int(m.group('start_minute'))),
|
return TimeRange(
|
||||||
Timestamp(active, int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('end_hour')), int(m.group('end_minute'))))
|
Timestamp(
|
||||||
return Timestamp(active, int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('start_hour')), int(m.group('start_minute')))
|
active,
|
||||||
|
int(m.group("year")),
|
||||||
|
int(m.group("month")),
|
||||||
|
int(m.group("day")),
|
||||||
|
m.group("dow"),
|
||||||
|
int(m.group("start_hour")),
|
||||||
|
int(m.group("start_minute")),
|
||||||
|
),
|
||||||
|
Timestamp(
|
||||||
|
active,
|
||||||
|
int(m.group("year")),
|
||||||
|
int(m.group("month")),
|
||||||
|
int(m.group("day")),
|
||||||
|
m.group("dow"),
|
||||||
|
int(m.group("end_hour")),
|
||||||
|
int(m.group("end_minute")),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return Timestamp(
|
||||||
|
active,
|
||||||
|
int(m.group("year")),
|
||||||
|
int(m.group("month")),
|
||||||
|
int(m.group("day")),
|
||||||
|
m.group("dow"),
|
||||||
|
int(m.group("start_hour")),
|
||||||
|
int(m.group("start_minute")),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def timestamp_to_string(ts):
|
def timestamp_to_string(ts):
|
||||||
date = '{year}-{month:02d}-{day:02d}'.format(
|
date = "{year}-{month:02d}-{day:02d}".format(
|
||||||
year=ts.year,
|
year=ts.year, month=ts.month, day=ts.day
|
||||||
month=ts.month,
|
|
||||||
day=ts.day
|
|
||||||
)
|
)
|
||||||
if ts.dow:
|
if ts.dow:
|
||||||
date = date + ' ' + ts.dow
|
date = date + " " + ts.dow
|
||||||
|
|
||||||
if ts.hour is not None:
|
if ts.hour is not None:
|
||||||
base = '{date} {hour:02}:{minute:02d}'.format(date=date, hour=ts.hour, minute=ts.minute)
|
base = "{date} {hour:02}:{minute:02d}".format(
|
||||||
|
date=date, hour=ts.hour, minute=ts.minute
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
base = date
|
base = date
|
||||||
|
|
||||||
if ts.active:
|
if ts.active:
|
||||||
return '<{}>'.format(base)
|
return "<{}>".format(base)
|
||||||
else:
|
else:
|
||||||
return '[{}]'.format(base)
|
return "[{}]".format(base)
|
||||||
|
|
||||||
|
|
||||||
def get_raw(doc):
|
def get_raw(doc):
|
||||||
if isinstance(doc, str):
|
if isinstance(doc, str):
|
||||||
@ -115,6 +195,7 @@ def get_raw(doc):
|
|||||||
else:
|
else:
|
||||||
return doc.get_raw()
|
return doc.get_raw()
|
||||||
|
|
||||||
|
|
||||||
class Line:
|
class Line:
|
||||||
def __init__(self, linenum, contents):
|
def __init__(self, linenum, contents):
|
||||||
self.linenum = linenum
|
self.linenum = linenum
|
||||||
@ -127,7 +208,8 @@ class Line:
|
|||||||
rawchunks.append(chunk)
|
rawchunks.append(chunk)
|
||||||
else:
|
else:
|
||||||
rawchunks.append(chunk.get_raw())
|
rawchunks.append(chunk.get_raw())
|
||||||
return ''.join(rawchunks) + '\n'
|
return "".join(rawchunks) + "\n"
|
||||||
|
|
||||||
|
|
||||||
class Text:
|
class Text:
|
||||||
def __init__(self, contents, line):
|
def __init__(self, contents, line):
|
||||||
@ -135,104 +217,122 @@ class Text:
|
|||||||
self.linenum = line
|
self.linenum = line
|
||||||
|
|
||||||
def get_raw(self):
|
def get_raw(self):
|
||||||
raw = ''.join(self.contents)
|
contents = []
|
||||||
return raw
|
for chunk in self.contents:
|
||||||
|
if isinstance(chunk, str):
|
||||||
|
contents.append(chunk)
|
||||||
|
else:
|
||||||
|
assert isinstance(chunk, MarkerToken)
|
||||||
|
contents.append(token_from_type(chunk.tok_type))
|
||||||
|
return ''.join(contents)
|
||||||
|
|
||||||
|
|
||||||
class Bold:
|
class Bold:
|
||||||
Marker = '*'
|
Marker = "*"
|
||||||
|
|
||||||
def __init__(self, contents, line):
|
def __init__(self, contents, line):
|
||||||
self.contents = contents
|
self.contents = contents
|
||||||
|
|
||||||
def get_raw(self):
|
def get_raw(self):
|
||||||
raw = ''.join(map(get_raw, self.contents))
|
raw = "".join(map(get_raw, self.contents))
|
||||||
return f"{self.Marker}{raw}{self.Marker}"
|
return f"{self.Marker}{raw}{self.Marker}"
|
||||||
|
|
||||||
|
|
||||||
class Code:
|
class Code:
|
||||||
Marker = '~'
|
Marker = "~"
|
||||||
|
|
||||||
def __init__(self, contents, line):
|
def __init__(self, contents, line):
|
||||||
self.contents = contents
|
self.contents = contents
|
||||||
|
|
||||||
def get_raw(self):
|
def get_raw(self):
|
||||||
raw = ''.join(map(get_raw, self.contents))
|
raw = "".join(map(get_raw, self.contents))
|
||||||
return f"{self.Marker}{raw}{self.Marker}"
|
return f"{self.Marker}{raw}{self.Marker}"
|
||||||
|
|
||||||
|
|
||||||
class Italic:
|
class Italic:
|
||||||
Marker = '/'
|
Marker = "/"
|
||||||
|
|
||||||
def __init__(self, contents, line):
|
def __init__(self, contents, line):
|
||||||
self.contents = contents
|
self.contents = contents
|
||||||
|
|
||||||
def get_raw(self):
|
def get_raw(self):
|
||||||
raw = ''.join(map(get_raw, self.contents))
|
raw = "".join(map(get_raw, self.contents))
|
||||||
return f"{self.Marker}{raw}{self.Marker}"
|
return f"{self.Marker}{raw}{self.Marker}"
|
||||||
|
|
||||||
|
|
||||||
class Strike:
|
class Strike:
|
||||||
Marker = '+'
|
Marker = "+"
|
||||||
|
|
||||||
def __init__(self, contents, line):
|
def __init__(self, contents, line):
|
||||||
self.contents = contents
|
self.contents = contents
|
||||||
|
|
||||||
def get_raw(self):
|
def get_raw(self):
|
||||||
raw = ''.join(map(get_raw, self.contents))
|
raw = "".join(map(get_raw, self.contents))
|
||||||
return f"{self.Marker}{raw}{self.Marker}"
|
return f"{self.Marker}{raw}{self.Marker}"
|
||||||
|
|
||||||
|
|
||||||
class Underlined:
|
class Underlined:
|
||||||
Marker = '_'
|
Marker = "_"
|
||||||
|
|
||||||
def __init__(self, contents, line):
|
def __init__(self, contents, line):
|
||||||
self.contents = contents
|
self.contents = contents
|
||||||
|
|
||||||
def get_raw(self):
|
def get_raw(self):
|
||||||
raw = ''.join(map(get_raw, self.contents))
|
raw = "".join(map(get_raw, self.contents))
|
||||||
return f"{self.Marker}{raw}{self.Marker}"
|
return f"{self.Marker}{raw}{self.Marker}"
|
||||||
|
|
||||||
|
|
||||||
class Verbatim:
|
class Verbatim:
|
||||||
Marker = '='
|
Marker = "="
|
||||||
|
|
||||||
def __init__(self, contents, line):
|
def __init__(self, contents, line):
|
||||||
self.contents = contents
|
self.contents = contents
|
||||||
|
|
||||||
def get_raw(self):
|
def get_raw(self):
|
||||||
raw = ''.join(map(get_raw, self.contents))
|
raw = "".join(map(get_raw, self.contents))
|
||||||
return f"{self.Marker}{raw}{self.Marker}"
|
return f"{self.Marker}{raw}{self.Marker}"
|
||||||
|
|
||||||
|
|
||||||
def is_pre(char: str) -> bool:
|
def is_pre(char: str) -> bool:
|
||||||
if isinstance(char, str):
|
if isinstance(char, str):
|
||||||
return char in '\n\r\t -({\'"'
|
return char in "\n\r\t -({'\""
|
||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def is_marker(char: str) -> bool:
|
def is_marker(char: str) -> bool:
|
||||||
if isinstance(char, str):
|
if isinstance(char, str):
|
||||||
return char in '*=/+_~'
|
return char in "*=/+_~"
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def is_border(char: str) -> bool:
|
def is_border(char: str) -> bool:
|
||||||
if isinstance(char, str):
|
if isinstance(char, str):
|
||||||
return char not in '\n\r\t '
|
return char not in "\n\r\t "
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def is_body(char: str) -> bool:
|
def is_body(char: str) -> bool:
|
||||||
if isinstance(char, str):
|
if isinstance(char, str):
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def is_post(char: str) -> bool:
|
def is_post(char: str) -> bool:
|
||||||
if isinstance(char, str):
|
if isinstance(char, str):
|
||||||
return char in '-.,;:!?\')}["'
|
return char in "-.,;:!?')}[\""
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
TOKEN_TYPE_TEXT = 0
|
TOKEN_TYPE_TEXT = 0
|
||||||
TOKEN_TYPE_OPEN_MARKER = 1
|
TOKEN_TYPE_OPEN_MARKER = 1
|
||||||
TOKEN_TYPE_CLOSE_MARKER = 2
|
TOKEN_TYPE_CLOSE_MARKER = 2
|
||||||
|
|
||||||
|
|
||||||
def tokenize_contents(contents: str):
|
def tokenize_contents(contents: str):
|
||||||
tokens = []
|
tokens = []
|
||||||
last_char = None
|
last_char = None
|
||||||
@ -244,17 +344,17 @@ def tokenize_contents(contents: str):
|
|||||||
has_changed = False
|
has_changed = False
|
||||||
|
|
||||||
if (
|
if (
|
||||||
(i not in closes)
|
(i not in closes)
|
||||||
and is_marker(char)
|
and is_marker(char)
|
||||||
and is_pre(last_char)
|
and is_pre(last_char)
|
||||||
and ((i + 1 < len(contents))
|
and ((i + 1 < len(contents)) and is_border(contents[i + 1]))
|
||||||
and is_border(contents[i + 1]))):
|
):
|
||||||
|
|
||||||
is_valid_mark = False
|
is_valid_mark = False
|
||||||
# Check that is closed later
|
# Check that is closed later
|
||||||
text_in_line = True
|
text_in_line = True
|
||||||
for j in range(i, len(contents) - 1):
|
for j in range(i, len(contents) - 1):
|
||||||
if contents[j] == '\n':
|
if contents[j] == "\n":
|
||||||
if not text_in_line:
|
if not text_in_line:
|
||||||
break
|
break
|
||||||
text_in_line = False
|
text_in_line = False
|
||||||
@ -267,13 +367,13 @@ def tokenize_contents(contents: str):
|
|||||||
|
|
||||||
if is_valid_mark:
|
if is_valid_mark:
|
||||||
if len(text) > 0:
|
if len(text) > 0:
|
||||||
tokens.append((TOKEN_TYPE_TEXT, ''.join(text)))
|
tokens.append((TOKEN_TYPE_TEXT, "".join(text)))
|
||||||
text = []
|
text = []
|
||||||
tokens.append((TOKEN_TYPE_OPEN_MARKER, char))
|
tokens.append((TOKEN_TYPE_OPEN_MARKER, char))
|
||||||
has_changed = True
|
has_changed = True
|
||||||
elif i in closes:
|
elif i in closes:
|
||||||
if len(text) > 0:
|
if len(text) > 0:
|
||||||
tokens.append((TOKEN_TYPE_TEXT, ''.join(text)))
|
tokens.append((TOKEN_TYPE_TEXT, "".join(text)))
|
||||||
text = []
|
text = []
|
||||||
tokens.append((TOKEN_TYPE_CLOSE_MARKER, char))
|
tokens.append((TOKEN_TYPE_CLOSE_MARKER, char))
|
||||||
has_changed = True
|
has_changed = True
|
||||||
@ -283,156 +383,57 @@ def tokenize_contents(contents: str):
|
|||||||
last_char = char
|
last_char = char
|
||||||
|
|
||||||
if len(text) > 0:
|
if len(text) > 0:
|
||||||
tokens.append((TOKEN_TYPE_TEXT, ''.join(text)))
|
tokens.append((TOKEN_TYPE_TEXT, "".join(text)))
|
||||||
|
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
|
|
||||||
def parse_contents(raw_contents:List[RawLine]):
|
def parse_contents(raw_contents: List[RawLine]):
|
||||||
NO_MODE = 0b0
|
|
||||||
BOLD_MODE = 0b1
|
|
||||||
CODE_MODE = 0b10
|
|
||||||
ITALIC_MODE = 0b100
|
|
||||||
STRIKE_MODE = 0b1000
|
|
||||||
UNDERLINED_MODE = 0b10000
|
|
||||||
VERBATIM_MODE = 0b100000
|
|
||||||
|
|
||||||
MARKERS = {
|
|
||||||
'*': BOLD_MODE,
|
|
||||||
'~': CODE_MODE,
|
|
||||||
'/': ITALIC_MODE,
|
|
||||||
'+': STRIKE_MODE,
|
|
||||||
'_': UNDERLINED_MODE,
|
|
||||||
'=': VERBATIM_MODE,
|
|
||||||
}
|
|
||||||
MODES = (
|
|
||||||
(BOLD_MODE, Bold),
|
|
||||||
(CODE_MODE, Code),
|
|
||||||
(ITALIC_MODE, Italic),
|
|
||||||
(STRIKE_MODE, Strike),
|
|
||||||
(UNDERLINED_MODE, Underlined),
|
|
||||||
(VERBATIM_MODE, Verbatim),
|
|
||||||
)
|
|
||||||
_MODES = {
|
|
||||||
BOLD_MODE: Bold,
|
|
||||||
CODE_MODE: Code,
|
|
||||||
ITALIC_MODE: Italic,
|
|
||||||
STRIKE_MODE: Strike,
|
|
||||||
UNDERLINED_MODE: Underlined,
|
|
||||||
VERBATIM_MODE: Verbatim,
|
|
||||||
}
|
|
||||||
|
|
||||||
mode = NO_MODE
|
|
||||||
escaped = False
|
|
||||||
|
|
||||||
chunk = []
|
|
||||||
inline = []
|
|
||||||
chunks = []
|
|
||||||
|
|
||||||
linenum = start_linenum = raw_contents[0].linenum
|
|
||||||
contents_buff = []
|
contents_buff = []
|
||||||
for line in raw_contents:
|
for line in raw_contents:
|
||||||
contents_buff.append(line.line)
|
contents_buff.append(line.line)
|
||||||
|
|
||||||
contents = '\n'.join(contents_buff)
|
contents = "\n".join(contents_buff)
|
||||||
tokens = tokenize_contents(contents)
|
tokens = tokenize_contents(contents)
|
||||||
|
current_line = raw_contents[0].linenum
|
||||||
|
|
||||||
# Use tokens to tag chunks of text with it's container type
|
|
||||||
for (tok_type, tok_val) in tokens:
|
|
||||||
if tok_type == TOKEN_TYPE_TEXT:
|
|
||||||
chunks.append((mode, tok_val))
|
|
||||||
elif tok_type == TOKEN_TYPE_OPEN_MARKER:
|
|
||||||
mode = mode | MARKERS[tok_val]
|
|
||||||
elif tok_type == TOKEN_TYPE_OPEN_MARKER:
|
|
||||||
mode = mode ^ MARKERS[tok_val]
|
|
||||||
|
|
||||||
# Convert those chunks to a tree
|
|
||||||
def tree_for_tag(tag, in_mode):
|
|
||||||
tree = []
|
|
||||||
for (mask, mode) in MODES:
|
|
||||||
if (mask & tag) and not (mask & in_mode):
|
|
||||||
tree.append(mode)
|
|
||||||
print(tree)
|
|
||||||
if len(tree) == 0:
|
|
||||||
return Text
|
|
||||||
|
|
||||||
|
|
||||||
if len(raw_contents) > 0:
|
|
||||||
current_line = raw_contents[0].linenum
|
|
||||||
|
|
||||||
# tree = []
|
|
||||||
# pos = []
|
|
||||||
# print('\n'.join(map(str, chunks)))
|
|
||||||
# for (tag, chunk) in chunks:
|
|
||||||
# if pos == []:
|
|
||||||
# tree.append(tree_for_tag(tag, NO_MODE)(chunk, line=current_line))
|
|
||||||
# pos.append(tree[-1])
|
|
||||||
# else:
|
|
||||||
# raise NotImplementedError()
|
|
||||||
|
|
||||||
# current_line += chunk.count('\n')
|
|
||||||
|
|
||||||
|
|
||||||
tree = []
|
|
||||||
mode_tree = []
|
|
||||||
contents = []
|
contents = []
|
||||||
# Use tokens to tag chunks of text with it's container type
|
# Use tokens to tag chunks of text with it's container type
|
||||||
for (tok_type, tok_val) in tokens:
|
for (tok_type, tok_val) in tokens:
|
||||||
if tok_type == TOKEN_TYPE_TEXT:
|
if tok_type == TOKEN_TYPE_TEXT:
|
||||||
if len(mode_tree) == 0:
|
contents.append(tok_val)
|
||||||
tree.append(Text(tok_val, current_line))
|
|
||||||
else:
|
|
||||||
contents[-1].append(tok_val)
|
|
||||||
|
|
||||||
current_line += chunk.count('\n')
|
|
||||||
|
|
||||||
elif tok_type == TOKEN_TYPE_OPEN_MARKER:
|
elif tok_type == TOKEN_TYPE_OPEN_MARKER:
|
||||||
mode_tree.append(_MODES[MARKERS[tok_val]])
|
contents.append(MarkerToken(False, MARKERS[tok_val]))
|
||||||
contents.append([])
|
|
||||||
|
|
||||||
elif tok_type == TOKEN_TYPE_CLOSE_MARKER:
|
elif tok_type == TOKEN_TYPE_CLOSE_MARKER:
|
||||||
mode = _MODES[MARKERS[tok_val]]
|
contents.append(MarkerToken(True, MARKERS[tok_val]))
|
||||||
matching_mode = mode_tree.pop()
|
|
||||||
assert mode == matching_mode
|
|
||||||
value = mode(contents.pop(), current_line)
|
|
||||||
current_line += chunk.count('\n')
|
|
||||||
|
|
||||||
if len(mode_tree) == 0: # Closed branch of tree
|
return [Text(contents, current_line)]
|
||||||
tree.append(value)
|
|
||||||
else:
|
|
||||||
print("{} <- {}".format(mode_tree[-1], mode))
|
|
||||||
contents[-1].append(value)
|
|
||||||
|
|
||||||
current_line += chunk.count('\n')
|
|
||||||
|
|
||||||
if len(tree) > 3:
|
|
||||||
print("L", len(tree))
|
|
||||||
print("F:", tree)
|
|
||||||
return tree
|
|
||||||
|
|
||||||
def parse_headline(hl) -> Headline:
|
def parse_headline(hl) -> Headline:
|
||||||
stars = hl['orig'].group('stars')
|
stars = hl["orig"].group("stars")
|
||||||
depth = len(stars)
|
depth = len(stars)
|
||||||
|
|
||||||
# TODO: Parse line for priority, cookies and tags
|
# TODO: Parse line for priority, cookies and tags
|
||||||
line = hl['orig'].group('line')
|
line = hl["orig"].group("line")
|
||||||
title = line.strip()
|
title = line.strip()
|
||||||
contents = parse_contents(hl['contents'])
|
contents = parse_contents(hl["contents"])
|
||||||
|
|
||||||
return Headline(start_line=hl['linenum'],
|
return Headline(
|
||||||
depth=depth,
|
start_line=hl["linenum"],
|
||||||
orig=hl['orig'],
|
depth=depth,
|
||||||
title=title,
|
orig=hl["orig"],
|
||||||
contents=contents,
|
title=title,
|
||||||
children=[parse_headline(child) for child in hl['children']],
|
contents=contents,
|
||||||
keywords=hl['keywords'],
|
children=[parse_headline(child) for child in hl["children"]],
|
||||||
properties=hl['properties'],
|
keywords=hl["keywords"],
|
||||||
structural=hl['structural'],
|
properties=hl["properties"],
|
||||||
title_start=None,
|
structural=hl["structural"],
|
||||||
priority=None,
|
title_start=None,
|
||||||
priority_start=None,
|
priority=None,
|
||||||
tags_start=None,
|
priority_start=None,
|
||||||
tags=None,
|
tags_start=None,
|
||||||
|
tags=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -454,35 +455,41 @@ class OrgDom:
|
|||||||
|
|
||||||
# Writing
|
# Writing
|
||||||
def dump_kw(self, kw):
|
def dump_kw(self, kw):
|
||||||
options = kw.match.group('options')
|
options = kw.match.group("options")
|
||||||
if not options:
|
if not options:
|
||||||
options = ''
|
options = ""
|
||||||
|
|
||||||
return (kw.linenum,
|
return (
|
||||||
'{indentation}#+{key}{options}:{spacing}{value}'.format(
|
kw.linenum,
|
||||||
indentation=kw.match.group('indentation'),
|
"{indentation}#+{key}{options}:{spacing}{value}".format(
|
||||||
key=kw.key,
|
indentation=kw.match.group("indentation"),
|
||||||
options=kw.options,
|
key=kw.key,
|
||||||
spacing=kw.match.group('spacing'),
|
options=kw.options,
|
||||||
value=kw.value,
|
spacing=kw.match.group("spacing"),
|
||||||
))
|
value=kw.value,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
def dump_property(self, prop: Property):
|
def dump_property(self, prop: Property):
|
||||||
plus = prop.match.group('plus')
|
plus = prop.match.group("plus")
|
||||||
if plus is None: plus = ''
|
if plus is None:
|
||||||
|
plus = ""
|
||||||
|
|
||||||
if isinstance(prop.value, Timestamp):
|
if isinstance(prop.value, Timestamp):
|
||||||
value = timestamp_to_string(prop.value)
|
value = timestamp_to_string(prop.value)
|
||||||
else:
|
else:
|
||||||
value = prop.value
|
value = prop.value
|
||||||
|
|
||||||
return (prop.linenum, '{indentation}:{key}{plus}:{spacing}{value}'.format(
|
return (
|
||||||
indentation=prop.match.group('indentation'),
|
prop.linenum,
|
||||||
key=prop.key,
|
"{indentation}:{key}{plus}:{spacing}{value}".format(
|
||||||
plus=plus,
|
indentation=prop.match.group("indentation"),
|
||||||
spacing=prop.match.group('spacing'),
|
key=prop.key,
|
||||||
value=value,
|
plus=plus,
|
||||||
))
|
spacing=prop.match.group("spacing"),
|
||||||
|
value=value,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
def dump_contents(self, raw):
|
def dump_contents(self, raw):
|
||||||
if isinstance(raw, RawLine):
|
if isinstance(raw, RawLine):
|
||||||
@ -494,7 +501,9 @@ class OrgDom:
|
|||||||
return (structural[0], structural[1])
|
return (structural[0], structural[1])
|
||||||
|
|
||||||
def dump_headline(self, headline):
|
def dump_headline(self, headline):
|
||||||
yield '*' * headline.depth + ' ' + headline.orig.group('spacing') + headline.title
|
yield "*" * headline.depth + " " + headline.orig.group(
|
||||||
|
"spacing"
|
||||||
|
) + headline.title
|
||||||
|
|
||||||
lines = []
|
lines = []
|
||||||
KW_T = 0
|
KW_T = 0
|
||||||
@ -523,21 +532,31 @@ class OrgDom:
|
|||||||
|
|
||||||
if ltype == PROPERTIES_T and last_type not in (STRUCTURAL_T, PROPERTIES_T):
|
if ltype == PROPERTIES_T and last_type not in (STRUCTURAL_T, PROPERTIES_T):
|
||||||
# No structural opening
|
# No structural opening
|
||||||
structured_lines.append(' ' * content.index(':') + ':PROPERTIES:\n')
|
structured_lines.append(" " * content.index(":") + ":PROPERTIES:\n")
|
||||||
logging.warning("Added structural: ".format(line[1][0], structured_lines[-1].strip()))
|
logging.warning(
|
||||||
elif ltype not in (STRUCTURAL_T, PROPERTIES_T) and last_type == PROPERTIES_T:
|
"Added structural: ".format(
|
||||||
|
line[1][0], structured_lines[-1].strip()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
elif (
|
||||||
|
ltype not in (STRUCTURAL_T, PROPERTIES_T) and last_type == PROPERTIES_T
|
||||||
|
):
|
||||||
# No structural closing
|
# No structural closing
|
||||||
last_line = lines[i - 1][1][1]
|
last_line = lines[i - 1][1][1]
|
||||||
structured_lines.append(' ' * last_line.index(':') + ':END:\n')
|
structured_lines.append(" " * last_line.index(":") + ":END:\n")
|
||||||
logging.warning("Added structural:{}: {}".format(line[1][0], structured_lines[-1].strip()))
|
logging.warning(
|
||||||
|
"Added structural:{}: {}".format(
|
||||||
|
line[1][0], structured_lines[-1].strip()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
elif ltype != CONTENT_T:
|
elif ltype != CONTENT_T:
|
||||||
content = content + '\n'
|
content = content + "\n"
|
||||||
|
|
||||||
last_type = ltype
|
last_type = ltype
|
||||||
structured_lines.append(content)
|
structured_lines.append(content)
|
||||||
|
|
||||||
yield ''.join(structured_lines)
|
yield "".join(structured_lines)
|
||||||
|
|
||||||
for child in headline.children:
|
for child in headline.children:
|
||||||
yield from self.dump_headline(child)
|
yield from self.dump_headline(child)
|
||||||
@ -555,8 +574,8 @@ class OrgDom:
|
|||||||
for headline in self.headlines:
|
for headline in self.headlines:
|
||||||
yield from self.dump_headline(headline)
|
yield from self.dump_headline(headline)
|
||||||
|
|
||||||
class OrgDomReader:
|
|
||||||
|
|
||||||
|
class OrgDomReader:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.headlines: List[Headline] = []
|
self.headlines: List[Headline] = []
|
||||||
self.keywords: List[Property] = []
|
self.keywords: List[Property] = []
|
||||||
@ -569,18 +588,18 @@ class OrgDomReader:
|
|||||||
## Construction
|
## Construction
|
||||||
def add_headline(self, linenum: int, match: re.Match) -> int:
|
def add_headline(self, linenum: int, match: re.Match) -> int:
|
||||||
# Position reader on the proper headline
|
# Position reader on the proper headline
|
||||||
stars = match.group('stars')
|
stars = match.group("stars")
|
||||||
depth = len(stars)
|
depth = len(stars)
|
||||||
|
|
||||||
headline = {
|
headline = {
|
||||||
'linenum': linenum,
|
"linenum": linenum,
|
||||||
'orig': match,
|
"orig": match,
|
||||||
'title': match.group('line'),
|
"title": match.group("line"),
|
||||||
'contents': [],
|
"contents": [],
|
||||||
'children': [],
|
"children": [],
|
||||||
'keywords': [],
|
"keywords": [],
|
||||||
'properties': [],
|
"properties": [],
|
||||||
'structural': [],
|
"structural": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
while (depth - 2) > len(self.headline_hierarchy):
|
while (depth - 2) > len(self.headline_hierarchy):
|
||||||
@ -592,41 +611,46 @@ class OrgDomReader:
|
|||||||
if depth == 1:
|
if depth == 1:
|
||||||
self.headlines.append(headline)
|
self.headlines.append(headline)
|
||||||
else:
|
else:
|
||||||
self.headline_hierarchy[-1]['children'].append(headline)
|
self.headline_hierarchy[-1]["children"].append(headline)
|
||||||
self.headline_hierarchy.append(headline)
|
self.headline_hierarchy.append(headline)
|
||||||
|
|
||||||
|
|
||||||
def add_keyword_line(self, linenum: int, match: re.Match) -> int:
|
def add_keyword_line(self, linenum: int, match: re.Match) -> int:
|
||||||
options = match.group('options')
|
options = match.group("options")
|
||||||
kw = Keyword(linenum, match, match.group('key'), match.group('value'), options if options is not None else '')
|
kw = Keyword(
|
||||||
|
linenum,
|
||||||
|
match,
|
||||||
|
match.group("key"),
|
||||||
|
match.group("value"),
|
||||||
|
options if options is not None else "",
|
||||||
|
)
|
||||||
if len(self.headline_hierarchy) == 0:
|
if len(self.headline_hierarchy) == 0:
|
||||||
self.keywords.append(kw)
|
self.keywords.append(kw)
|
||||||
else:
|
else:
|
||||||
self.headline_hierarchy[-1]['keywords'].append(kw)
|
self.headline_hierarchy[-1]["keywords"].append(kw)
|
||||||
|
|
||||||
def add_raw_line(self, linenum: int, line: str) -> int:
|
def add_raw_line(self, linenum: int, line: str) -> int:
|
||||||
raw = RawLine(linenum, line)
|
raw = RawLine(linenum, line)
|
||||||
if len(self.headline_hierarchy) == 0:
|
if len(self.headline_hierarchy) == 0:
|
||||||
self.contents.append(raw)
|
self.contents.append(raw)
|
||||||
else:
|
else:
|
||||||
self.headline_hierarchy[-1]['contents'].append(raw)
|
self.headline_hierarchy[-1]["contents"].append(raw)
|
||||||
|
|
||||||
def add_property_drawer_line(self, linenum: int, line: str, match: re.Match) -> int:
|
def add_property_drawer_line(self, linenum: int, line: str, match: re.Match) -> int:
|
||||||
self.current_drawer = self.headline_hierarchy[-1]['properties']
|
self.current_drawer = self.headline_hierarchy[-1]["properties"]
|
||||||
self.headline_hierarchy[-1]['structural'].append((linenum, line))
|
self.headline_hierarchy[-1]["structural"].append((linenum, line))
|
||||||
|
|
||||||
def add_drawer_end_line(self, linenum: int, line: str, match: re.Match) -> int:
|
def add_drawer_end_line(self, linenum: int, line: str, match: re.Match) -> int:
|
||||||
self.current_drawer = None
|
self.current_drawer = None
|
||||||
self.headline_hierarchy[-1]['structural'].append((linenum, line))
|
self.headline_hierarchy[-1]["structural"].append((linenum, line))
|
||||||
|
|
||||||
def add_node_properties_line(self, linenum: int, match: re.Match) -> int:
|
def add_node_properties_line(self, linenum: int, match: re.Match) -> int:
|
||||||
key = match.group('key')
|
key = match.group("key")
|
||||||
value = match.group('value').strip()
|
value = match.group("value").strip()
|
||||||
|
|
||||||
if (value.count('>--<') == 1) or (value.count(']--[') == 1):
|
if (value.count(">--<") == 1) or (value.count("]--[") == 1):
|
||||||
# Time ranges with two different dates
|
# Time ranges with two different dates
|
||||||
# @TODO properly consider "=> DURATION" section
|
# @TODO properly consider "=> DURATION" section
|
||||||
chunks = value.split('=').split('--')
|
chunks = value.split("=").split("--")
|
||||||
as_time_range = parse_org_time(chunks[0], chunks[1])
|
as_time_range = parse_org_time(chunks[0], chunks[1])
|
||||||
if (as_time_range[0] is not None) and (as_time_range[1] is not None):
|
if (as_time_range[0] is not None) and (as_time_range[1] is not None):
|
||||||
value = TimeRange(as_time_range[0], as_time_range[1])
|
value = TimeRange(as_time_range[0], as_time_range[1])
|
||||||
@ -636,7 +660,7 @@ class OrgDomReader:
|
|||||||
self.current_drawer.append(Property(linenum, match, key, value, None))
|
self.current_drawer.append(Property(linenum, match, key, value, None))
|
||||||
|
|
||||||
def read(self, s, environment):
|
def read(self, s, environment):
|
||||||
lines = s.split('\n')
|
lines = s.split("\n")
|
||||||
reader = enumerate(lines)
|
reader = enumerate(lines)
|
||||||
|
|
||||||
for linenum, line in reader:
|
for linenum, line in reader:
|
||||||
@ -653,7 +677,7 @@ class OrgDomReader:
|
|||||||
elif m := NODE_PROPERTIES_RE.match(line):
|
elif m := NODE_PROPERTIES_RE.match(line):
|
||||||
self.add_node_properties_line(linenum, m)
|
self.add_node_properties_line(linenum, m)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError('{}: ‘{}’'.format(linenum, line))
|
raise NotImplementedError("{}: ‘{}’".format(linenum, line))
|
||||||
|
|
||||||
|
|
||||||
def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=False):
|
def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=False):
|
||||||
@ -662,7 +686,9 @@ def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=False):
|
|||||||
dom = doc.finalize()
|
dom = doc.finalize()
|
||||||
if extra_cautious: # Check that all options can be properly re-serialized
|
if extra_cautious: # Check that all options can be properly re-serialized
|
||||||
if dumps(dom) != s:
|
if dumps(dom) != s:
|
||||||
raise NotImplementedError("Error re-serializing, file uses something not implemented")
|
raise NotImplementedError(
|
||||||
|
"Error re-serializing, file uses something not implemented"
|
||||||
|
)
|
||||||
return dom
|
return dom
|
||||||
|
|
||||||
|
|
||||||
@ -672,6 +698,6 @@ def load(f, environment=BASE_ENVIRONMENT, extra_cautious=False):
|
|||||||
|
|
||||||
def dumps(doc):
|
def dumps(doc):
|
||||||
dump = list(doc.dump())
|
dump = list(doc.dump())
|
||||||
result = '\n'.join(dump)
|
result = "\n".join(dump)
|
||||||
print(result)
|
print(result)
|
||||||
return result
|
return result
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
from .org_dom import Headline, Line, RawLine, Text, Bold, Code, Italic, Strike, Underlined, Verbatim
|
from .org_dom import (Bold, Code, Headline, Italic, Line, RawLine, Strike,
|
||||||
|
Text, Underlined, Verbatim)
|
||||||
|
|
||||||
|
|
||||||
def get_hl_raw_contents(doc: Headline) -> str:
|
def get_hl_raw_contents(doc: Headline) -> str:
|
||||||
lines = []
|
lines = []
|
||||||
|
@ -22,6 +22,8 @@
|
|||||||
|
|
||||||
This is a nested *bold =verbatim /italic +strike _underlined ~code .~ ._ .+ ./ .= .*
|
This is a nested *bold =verbatim /italic +strike _underlined ~code .~ ._ .+ ./ .= .*
|
||||||
|
|
||||||
|
This is a interleaved *bold =verbatim /italic +strike _underlined ~code .* .= ./ .+ ._ .~
|
||||||
|
|
||||||
This is a _ non-underlined phrase because an incorrectly placed content _.
|
This is a _ non-underlined phrase because an incorrectly placed content _.
|
||||||
|
|
||||||
This is a _ non-underlined phrase because an incorrectly placed content beginning_.
|
This is a _ non-underlined phrase because an incorrectly placed content beginning_.
|
||||||
|
@ -5,7 +5,7 @@ from datetime import datetime as DT
|
|||||||
|
|
||||||
from org_dom import dumps, load, loads
|
from org_dom import dumps, load, loads
|
||||||
from utils.dom_assertions import (BOLD, CODE, HL, ITALIC, SPAN, STRIKE,
|
from utils.dom_assertions import (BOLD, CODE, HL, ITALIC, SPAN, STRIKE,
|
||||||
UNDERLINED, VERBATIM, WEB_LINK, Dom,)
|
UNDERLINED, VERBATIM, WEB_LINK, Dom, Tokens)
|
||||||
|
|
||||||
DIR = os.path.dirname(os.path.abspath(__file__))
|
DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
@ -47,7 +47,7 @@ class TestSerde(unittest.TestCase):
|
|||||||
self.assertEqual(dumps(doc), orig)
|
self.assertEqual(dumps(doc), orig)
|
||||||
|
|
||||||
def test_markup_file_02(self):
|
def test_markup_file_02(self):
|
||||||
self.maxDiff = 1024
|
self.maxDiff = 10000
|
||||||
with open(os.path.join(DIR, '02-markup.org')) as f:
|
with open(os.path.join(DIR, '02-markup.org')) as f:
|
||||||
doc = load(f)
|
doc = load(f)
|
||||||
|
|
||||||
@ -82,11 +82,33 @@ class TestSerde(unittest.TestCase):
|
|||||||
SPAN(" This is a nested ", BOLD(["bold ", VERBATIM(["verbatim ", ITALIC(["italic ", STRIKE(["strike ", UNDERLINED(["underlined ", CODE("code ."), " ."]), " ."]), " ."]), " ."]), " ."])),
|
SPAN(" This is a nested ", BOLD(["bold ", VERBATIM(["verbatim ", ITALIC(["italic ", STRIKE(["strike ", UNDERLINED(["underlined ", CODE("code ."), " ."]), " ."]), " ."]), " ."]), " ."])),
|
||||||
SPAN("\n"),
|
SPAN("\n"),
|
||||||
|
|
||||||
# SPAN(""),
|
SPAN("\n"),
|
||||||
# # TODO: THIS IS INTERLEAVED, not nested
|
# THIS IS INTERLEAVED, not nested
|
||||||
# In ORG: This is a interleaved *bold =verbatim /italic +strike _underlined ~code .* .= ./ .+ ._ .~
|
SPAN([" This is a interleaved ",
|
||||||
# SPAN(" This is a nested ", BOLD(["bold ", VERBATIM(["verbatim ", ITALIC(["italic ", STRIKE(["strike ", UNDERLINED(["underlined ", CODE("code ."), " ."]), " ."]), " ."]), " ."]), " ."])),
|
Tokens.BOLD_START,
|
||||||
# SPAN(""),
|
"bold ",
|
||||||
|
Tokens.VERBATIM_START,
|
||||||
|
"verbatim ",
|
||||||
|
Tokens.ITALIC_START,
|
||||||
|
"italic ",
|
||||||
|
Tokens.STRIKE_START,
|
||||||
|
"strike ",
|
||||||
|
Tokens.UNDERLINED_START,
|
||||||
|
"underlined ",
|
||||||
|
Tokens.CODE_START,
|
||||||
|
"code .",
|
||||||
|
Tokens.BOLD_END,
|
||||||
|
" .",
|
||||||
|
Tokens.VERBATIM_END,
|
||||||
|
" .",
|
||||||
|
Tokens.ITALIC_END,
|
||||||
|
" .",
|
||||||
|
Tokens.STRIKE_END,
|
||||||
|
" .",
|
||||||
|
Tokens.UNDERLINED_END,
|
||||||
|
" .",
|
||||||
|
Tokens.CODE_END,
|
||||||
|
"\n"]),
|
||||||
|
|
||||||
SPAN("\n"),
|
SPAN("\n"),
|
||||||
SPAN(" This is a _ non-underlined phrase because an incorrectly placed content _.\n"),
|
SPAN(" This is a _ non-underlined phrase because an incorrectly placed content _.\n"),
|
||||||
|
@ -2,7 +2,8 @@ import collections
|
|||||||
import unittest
|
import unittest
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from org_dom import Line, Text, Bold, Code, Italic, Strike, Underlined, Verbatim, get_raw_contents
|
from org_dom import (Bold, Code, Italic, Line, Strike, Text, Underlined,
|
||||||
|
Verbatim, get_raw_contents)
|
||||||
|
|
||||||
|
|
||||||
def timestamp_to_datetime(ts):
|
def timestamp_to_datetime(ts):
|
||||||
@ -13,7 +14,7 @@ def get_raw(doc):
|
|||||||
if isinstance(doc, str):
|
if isinstance(doc, str):
|
||||||
return doc
|
return doc
|
||||||
elif isinstance(doc, list):
|
elif isinstance(doc, list):
|
||||||
return ''.join([get_raw(e) for e in doc])
|
return "".join([get_raw(e) for e in doc])
|
||||||
else:
|
else:
|
||||||
return doc.get_raw()
|
return doc.get_raw()
|
||||||
|
|
||||||
@ -44,8 +45,7 @@ class Dom:
|
|||||||
test_case.assertEqual(len(doc.getTopHeadlines()), 0, "Top")
|
test_case.assertEqual(len(doc.getTopHeadlines()), 0, "Top")
|
||||||
else:
|
else:
|
||||||
doc_headlines = doc.getTopHeadlines()
|
doc_headlines = doc.getTopHeadlines()
|
||||||
test_case.assertEqual(len(doc_headlines), len(self.children),
|
test_case.assertEqual(len(doc_headlines), len(self.children), "Top")
|
||||||
"Top")
|
|
||||||
|
|
||||||
for i, children in enumerate(self.children):
|
for i, children in enumerate(self.children):
|
||||||
children.assert_matches(test_case, doc_headlines[i])
|
children.assert_matches(test_case, doc_headlines[i])
|
||||||
@ -72,7 +72,8 @@ class HL:
|
|||||||
test_case.assertEqual(doc_props[i].key, prop[0])
|
test_case.assertEqual(doc_props[i].key, prop[0])
|
||||||
if isinstance(prop[1], datetime):
|
if isinstance(prop[1], datetime):
|
||||||
test_case.assertEqual(
|
test_case.assertEqual(
|
||||||
timestamp_to_datetime(doc_props[i].value), prop[1])
|
timestamp_to_datetime(doc_props[i].value), prop[1]
|
||||||
|
)
|
||||||
|
|
||||||
test_case.assertEqual(get_raw_contents(doc), self.get_raw())
|
test_case.assertEqual(get_raw_contents(doc), self.get_raw())
|
||||||
|
|
||||||
@ -81,14 +82,13 @@ class HL:
|
|||||||
test_case.assertEqual(len(doc.children), 0)
|
test_case.assertEqual(len(doc.children), 0)
|
||||||
else:
|
else:
|
||||||
doc_headlines = doc.children
|
doc_headlines = doc.children
|
||||||
test_case.assertEqual(len(doc_headlines), len(self.children),
|
test_case.assertEqual(len(doc_headlines), len(self.children), self.title)
|
||||||
self.title)
|
|
||||||
|
|
||||||
for i, children in enumerate(self.children):
|
for i, children in enumerate(self.children):
|
||||||
children.assert_matches(test_case, doc_headlines[i])
|
children.assert_matches(test_case, doc_headlines[i])
|
||||||
|
|
||||||
def get_raw(self):
|
def get_raw(self):
|
||||||
return ''.join(map(get_raw, self.content))
|
return "".join(map(get_raw, self.content))
|
||||||
|
|
||||||
|
|
||||||
class SPAN:
|
class SPAN:
|
||||||
@ -100,10 +100,16 @@ class SPAN:
|
|||||||
for section in self.contents:
|
for section in self.contents:
|
||||||
if isinstance(section, str):
|
if isinstance(section, str):
|
||||||
chunks.append(section)
|
chunks.append(section)
|
||||||
|
elif isinstance(section, list):
|
||||||
|
for subsection in section:
|
||||||
|
if isinstance(subsection, str):
|
||||||
|
chunks.append(subsection)
|
||||||
|
else:
|
||||||
|
chunks.append(subsection.get_raw())
|
||||||
else:
|
else:
|
||||||
chunks.append(section.get_raw())
|
chunks.append(section.get_raw())
|
||||||
|
|
||||||
return ''.join(chunks)
|
return "".join(chunks)
|
||||||
|
|
||||||
def assert_matches(self, test_case, doc):
|
def assert_matches(self, test_case, doc):
|
||||||
if not isinstance(doc, Line):
|
if not isinstance(doc, Line):
|
||||||
@ -121,7 +127,7 @@ class BOLD:
|
|||||||
self.text = text
|
self.text = text
|
||||||
|
|
||||||
def get_raw(self):
|
def get_raw(self):
|
||||||
return '*{}*'.format(get_raw(self.text))
|
return "*{}*".format(get_raw(self.text))
|
||||||
|
|
||||||
def assertEqual(self, test_case, other):
|
def assertEqual(self, test_case, other):
|
||||||
test_case.assertTrue(isinstance(other, Bold))
|
test_case.assertTrue(isinstance(other, Bold))
|
||||||
@ -133,29 +139,31 @@ class CODE:
|
|||||||
self.text = text
|
self.text = text
|
||||||
|
|
||||||
def get_raw(self):
|
def get_raw(self):
|
||||||
return '~{}~'.format(get_raw(self.text))
|
return "~{}~".format(get_raw(self.text))
|
||||||
|
|
||||||
def assertEqual(self, test_case, other):
|
def assertEqual(self, test_case, other):
|
||||||
test_case.assertTrue(isinstance(other, Code))
|
test_case.assertTrue(isinstance(other, Code))
|
||||||
test_case.assertEqual(self.text, other.contents)
|
test_case.assertEqual(self.text, other.contents)
|
||||||
|
|
||||||
|
|
||||||
class ITALIC:
|
class ITALIC:
|
||||||
def __init__(self, text):
|
def __init__(self, text):
|
||||||
self.text = text
|
self.text = text
|
||||||
|
|
||||||
def get_raw(self):
|
def get_raw(self):
|
||||||
return '/{}/'.format(get_raw(self.text))
|
return "/{}/".format(get_raw(self.text))
|
||||||
|
|
||||||
def assertEqual(self, test_case, other):
|
def assertEqual(self, test_case, other):
|
||||||
test_case.assertTrue(isinstance(other, Italic))
|
test_case.assertTrue(isinstance(other, Italic))
|
||||||
test_case.assertEqual(self.text, other.contents)
|
test_case.assertEqual(self.text, other.contents)
|
||||||
|
|
||||||
|
|
||||||
class STRIKE:
|
class STRIKE:
|
||||||
def __init__(self, text):
|
def __init__(self, text):
|
||||||
self.text = text
|
self.text = text
|
||||||
|
|
||||||
def get_raw(self):
|
def get_raw(self):
|
||||||
return '+{}+'.format(get_raw(self.text))
|
return "+{}+".format(get_raw(self.text))
|
||||||
|
|
||||||
def assertEqual(self, test_case, other):
|
def assertEqual(self, test_case, other):
|
||||||
test_case.assertTrue(isinstance(other, Strike))
|
test_case.assertTrue(isinstance(other, Strike))
|
||||||
@ -167,32 +175,54 @@ class UNDERLINED:
|
|||||||
self.text = text
|
self.text = text
|
||||||
|
|
||||||
def get_raw(self):
|
def get_raw(self):
|
||||||
return '_{}_'.format(get_raw(self.text))
|
return "_{}_".format(get_raw(self.text))
|
||||||
|
|
||||||
def assertEqual(self, test_case, other):
|
def assertEqual(self, test_case, other):
|
||||||
test_case.assertTrue(isinstance(other, Underlined))
|
test_case.assertTrue(isinstance(other, Underlined))
|
||||||
test_case.assertEqual(self.text, other.contents)
|
test_case.assertEqual(self.text, other.contents)
|
||||||
|
|
||||||
|
|
||||||
class VERBATIM:
|
class VERBATIM:
|
||||||
def __init__(self, text):
|
def __init__(self, text):
|
||||||
self.text = text
|
self.text = text
|
||||||
|
|
||||||
def get_raw(self):
|
def get_raw(self):
|
||||||
return '={}='.format(get_raw(self.text))
|
return "={}=".format(get_raw(self.text))
|
||||||
|
|
||||||
def assertEqual(self, test_case, other):
|
def assertEqual(self, test_case, other):
|
||||||
test_case.assertTrue(isinstance(other, Verbatim))
|
test_case.assertTrue(isinstance(other, Verbatim))
|
||||||
test_case.assertEqual(self.text, other.contents)
|
test_case.assertEqual(self.text, other.contents)
|
||||||
|
|
||||||
|
|
||||||
class WEB_LINK:
|
class WEB_LINK:
|
||||||
def __init__(self, text, link):
|
def __init__(self, text, link):
|
||||||
self.text = text
|
self.text = text
|
||||||
self.link = link
|
self.link = link
|
||||||
|
|
||||||
def get_raw(self):
|
def get_raw(self):
|
||||||
return '[[{}][{}]]'.format(self.link, self.text)
|
return "[[{}][{}]]".format(self.link, self.text)
|
||||||
|
|
||||||
def assertEqual(self, test_case, other):
|
def assertEqual(self, test_case, other):
|
||||||
test_case.assertTrue(isinstance(other, WebLink))
|
test_case.assertTrue(isinstance(other, WebLink))
|
||||||
test_case.assertEqual(self.text, other.contents)
|
test_case.assertEqual(self.text, other.contents)
|
||||||
test_case.assertEqual(self.link, other.link)
|
test_case.assertEqual(self.link, other.link)
|
||||||
|
|
||||||
|
|
||||||
|
class Tokens:
|
||||||
|
BOLD_END = "*"
|
||||||
|
BOLD_START = "*"
|
||||||
|
|
||||||
|
VERBATIM_START = "="
|
||||||
|
VERBATIM_END = "="
|
||||||
|
|
||||||
|
ITALIC_START = "/"
|
||||||
|
ITALIC_END = "/"
|
||||||
|
|
||||||
|
STRIKE_START = "+"
|
||||||
|
STRIKE_END = "+"
|
||||||
|
|
||||||
|
UNDERLINED_START = "_"
|
||||||
|
UNDERLINED_END = "_"
|
||||||
|
|
||||||
|
CODE_START = "~"
|
||||||
|
CODE_END = "~"
|
||||||
|
Loading…
Reference in New Issue
Block a user