Add simple support for nested markup.
This commit is contained in:
parent
5b886e5e24
commit
f6de69fd90
6 changed files with 356 additions and 121 deletions
|
@ -109,6 +109,11 @@ def timestamp_to_string(ts):
|
|||
else:
|
||||
return '[{}]'.format(base)
|
||||
|
||||
def get_raw(doc):
|
||||
if isinstance(doc, str):
|
||||
return doc
|
||||
else:
|
||||
return doc.get_raw()
|
||||
|
||||
class Line:
|
||||
def __init__(self, linenum, contents):
|
||||
|
@ -125,73 +130,190 @@ class Line:
|
|||
return ''.join(rawchunks) + '\n'
|
||||
|
||||
class Text:
|
||||
def __init__(self, contents):
|
||||
def __init__(self, contents, line):
|
||||
self.contents = contents
|
||||
self.linenum = line
|
||||
|
||||
def get_raw(self):
|
||||
raw = ''.join(self.contents)
|
||||
return raw
|
||||
|
||||
class Bold:
|
||||
def __init__(self, contents):
|
||||
Marker = '*'
|
||||
|
||||
def __init__(self, contents, line):
|
||||
self.contents = contents
|
||||
|
||||
def get_raw(self):
|
||||
raw = ''.join(self.contents)
|
||||
return f"*{raw}*"
|
||||
raw = ''.join(map(get_raw, self.contents))
|
||||
return f"{self.Marker}{raw}{self.Marker}"
|
||||
|
||||
class Code:
|
||||
def __init__(self, contents):
|
||||
Marker = '~'
|
||||
|
||||
def __init__(self, contents, line):
|
||||
self.contents = contents
|
||||
|
||||
def get_raw(self):
|
||||
raw = ''.join(self.contents)
|
||||
return f"~{raw}~"
|
||||
raw = ''.join(map(get_raw, self.contents))
|
||||
return f"{self.Marker}{raw}{self.Marker}"
|
||||
|
||||
class Italic:
|
||||
def __init__(self, contents):
|
||||
Marker = '/'
|
||||
|
||||
def __init__(self, contents, line):
|
||||
self.contents = contents
|
||||
|
||||
def get_raw(self):
|
||||
raw = ''.join(self.contents)
|
||||
return f"/{raw}/"
|
||||
raw = ''.join(map(get_raw, self.contents))
|
||||
return f"{self.Marker}{raw}{self.Marker}"
|
||||
|
||||
class Strike:
|
||||
def __init__(self, contents):
|
||||
Marker = '+'
|
||||
|
||||
def __init__(self, contents, line):
|
||||
self.contents = contents
|
||||
|
||||
def get_raw(self):
|
||||
raw = ''.join(self.contents)
|
||||
return f"+{raw}+"
|
||||
raw = ''.join(map(get_raw, self.contents))
|
||||
return f"{self.Marker}{raw}{self.Marker}"
|
||||
|
||||
class Underlined:
|
||||
def __init__(self, contents):
|
||||
Marker = '_'
|
||||
|
||||
def __init__(self, contents, line):
|
||||
self.contents = contents
|
||||
|
||||
def get_raw(self):
|
||||
raw = ''.join(self.contents)
|
||||
return f"_{raw}_"
|
||||
raw = ''.join(map(get_raw, self.contents))
|
||||
return f"{self.Marker}{raw}{self.Marker}"
|
||||
|
||||
class Verbatim:
|
||||
def __init__(self, contents):
|
||||
Marker = '='
|
||||
|
||||
def __init__(self, contents, line):
|
||||
self.contents = contents
|
||||
|
||||
def get_raw(self):
|
||||
raw = ''.join(self.contents)
|
||||
return f"={raw}="
|
||||
raw = ''.join(map(get_raw, self.contents))
|
||||
return f"{self.Marker}{raw}{self.Marker}"
|
||||
|
||||
|
||||
def is_pre(char: str) -> bool:
|
||||
if isinstance(char, str):
|
||||
return char in '\n\r\t -({\'"'
|
||||
else:
|
||||
return True
|
||||
|
||||
def is_marker(char: str) -> bool:
|
||||
if isinstance(char, str):
|
||||
return char in '*=/+_~'
|
||||
else:
|
||||
return False
|
||||
|
||||
def is_border(char: str) -> bool:
|
||||
if isinstance(char, str):
|
||||
return char not in '\n\r\t '
|
||||
else:
|
||||
return False
|
||||
|
||||
def is_body(char: str) -> bool:
|
||||
if isinstance(char, str):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def is_post(char: str) -> bool:
|
||||
if isinstance(char, str):
|
||||
return char in '-.,;:!?\')}["'
|
||||
else:
|
||||
return False
|
||||
|
||||
TOKEN_TYPE_TEXT = 0
|
||||
TOKEN_TYPE_OPEN_MARKER = 1
|
||||
TOKEN_TYPE_CLOSE_MARKER = 2
|
||||
|
||||
def tokenize_contents(contents: str):
|
||||
tokens = []
|
||||
last_char = None
|
||||
|
||||
text = []
|
||||
closes = set()
|
||||
|
||||
for i, char in enumerate(contents):
|
||||
has_changed = False
|
||||
|
||||
if (
|
||||
(i not in closes)
|
||||
and is_marker(char)
|
||||
and is_pre(last_char)
|
||||
and ((i + 1 < len(contents))
|
||||
and is_border(contents[i + 1]))):
|
||||
|
||||
is_valid_mark = False
|
||||
# Check that is closed later
|
||||
text_in_line = True
|
||||
for j in range(i, len(contents) - 1):
|
||||
if contents[j] == '\n':
|
||||
if not text_in_line:
|
||||
break
|
||||
text_in_line = False
|
||||
elif is_border(contents[j]) and contents[j + 1] == char:
|
||||
is_valid_mark = True
|
||||
closes.add(j + 1)
|
||||
break
|
||||
else:
|
||||
text_in_line |= is_body(contents[j])
|
||||
|
||||
if is_valid_mark:
|
||||
if len(text) > 0:
|
||||
tokens.append((TOKEN_TYPE_TEXT, ''.join(text)))
|
||||
text = []
|
||||
tokens.append((TOKEN_TYPE_OPEN_MARKER, char))
|
||||
has_changed = True
|
||||
elif i in closes:
|
||||
if len(text) > 0:
|
||||
tokens.append((TOKEN_TYPE_TEXT, ''.join(text)))
|
||||
text = []
|
||||
tokens.append((TOKEN_TYPE_CLOSE_MARKER, char))
|
||||
has_changed = True
|
||||
|
||||
if not has_changed:
|
||||
text.append(char)
|
||||
last_char = char
|
||||
|
||||
if len(text) > 0:
|
||||
tokens.append((TOKEN_TYPE_TEXT, ''.join(text)))
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
def parse_contents(raw_contents:List[RawLine]):
|
||||
NO_MODE = 0
|
||||
BOLD_MODE = 1
|
||||
CODE_MODE = 2
|
||||
ITALIC_MODE = 3
|
||||
STRIKE_MODE = 4
|
||||
UNDERLINED_MODE = 5
|
||||
VERBATIM_MODE = 6
|
||||
NO_MODE = 0b0
|
||||
BOLD_MODE = 0b1
|
||||
CODE_MODE = 0b10
|
||||
ITALIC_MODE = 0b100
|
||||
STRIKE_MODE = 0b1000
|
||||
UNDERLINED_MODE = 0b10000
|
||||
VERBATIM_MODE = 0b100000
|
||||
|
||||
MODE_CLASS = {
|
||||
NO_MODE: Line,
|
||||
MARKERS = {
|
||||
'*': BOLD_MODE,
|
||||
'~': CODE_MODE,
|
||||
'/': ITALIC_MODE,
|
||||
'+': STRIKE_MODE,
|
||||
'_': UNDERLINED_MODE,
|
||||
'=': VERBATIM_MODE,
|
||||
}
|
||||
MODES = (
|
||||
(BOLD_MODE, Bold),
|
||||
(CODE_MODE, Code),
|
||||
(ITALIC_MODE, Italic),
|
||||
(STRIKE_MODE, Strike),
|
||||
(UNDERLINED_MODE, Underlined),
|
||||
(VERBATIM_MODE, Verbatim),
|
||||
)
|
||||
_MODES = {
|
||||
BOLD_MODE: Bold,
|
||||
CODE_MODE: Code,
|
||||
ITALIC_MODE: Italic,
|
||||
|
@ -213,68 +335,80 @@ def parse_contents(raw_contents:List[RawLine]):
|
|||
contents_buff.append(line.line)
|
||||
|
||||
contents = '\n'.join(contents_buff)
|
||||
tokens = tokenize_contents(contents)
|
||||
|
||||
for c in contents:
|
||||
if mode == NO_MODE:
|
||||
if escaped:
|
||||
chunk.append(c)
|
||||
escaped = False
|
||||
# Use tokens to tag chunks of text with it's container type
|
||||
for (tok_type, tok_val) in tokens:
|
||||
if tok_type == TOKEN_TYPE_TEXT:
|
||||
chunks.append((mode, tok_val))
|
||||
elif tok_type == TOKEN_TYPE_OPEN_MARKER:
|
||||
mode = mode | MARKERS[tok_val]
|
||||
elif tok_type == TOKEN_TYPE_OPEN_MARKER:
|
||||
mode = mode ^ MARKERS[tok_val]
|
||||
|
||||
elif c == '\\':
|
||||
escaped = True
|
||||
elif c == '*':
|
||||
mode = BOLD_MODE
|
||||
elif c == '~':
|
||||
mode = CODE_MODE
|
||||
elif c == '/':
|
||||
mode = ITALIC_MODE
|
||||
elif c == '+':
|
||||
mode = STRIKE_MODE
|
||||
elif c == '_':
|
||||
mode = UNDERLINED_MODE
|
||||
elif c == '=':
|
||||
mode = VERBATIM_MODE
|
||||
elif c == '\n':
|
||||
chunks.append(Line(linenum, inline + [Text(chunk)]))
|
||||
chunk = []
|
||||
inline = []
|
||||
# Convert those chunks to a tree
|
||||
def tree_for_tag(tag, in_mode):
|
||||
tree = []
|
||||
for (mask, mode) in MODES:
|
||||
if (mask & tag) and not (mask & in_mode):
|
||||
tree.append(mode)
|
||||
print(tree)
|
||||
if len(tree) == 0:
|
||||
return Text
|
||||
|
||||
|
||||
if len(raw_contents) > 0:
|
||||
current_line = raw_contents[0].linenum
|
||||
|
||||
# tree = []
|
||||
# pos = []
|
||||
# print('\n'.join(map(str, chunks)))
|
||||
# for (tag, chunk) in chunks:
|
||||
# if pos == []:
|
||||
# tree.append(tree_for_tag(tag, NO_MODE)(chunk, line=current_line))
|
||||
# pos.append(tree[-1])
|
||||
# else:
|
||||
# raise NotImplementedError()
|
||||
|
||||
# current_line += chunk.count('\n')
|
||||
|
||||
|
||||
tree = []
|
||||
mode_tree = []
|
||||
contents = []
|
||||
# Use tokens to tag chunks of text with it's container type
|
||||
for (tok_type, tok_val) in tokens:
|
||||
if tok_type == TOKEN_TYPE_TEXT:
|
||||
if len(mode_tree) == 0:
|
||||
tree.append(Text(tok_val, current_line))
|
||||
else:
|
||||
chunk.append(c)
|
||||
contents[-1].append(tok_val)
|
||||
|
||||
if mode != NO_MODE:
|
||||
inline.append(Text([''.join(chunk)]))
|
||||
chunk = []
|
||||
else:
|
||||
if escaped:
|
||||
chunk.append(c)
|
||||
escaped = False
|
||||
current_line += chunk.count('\n')
|
||||
|
||||
was_mode = mode
|
||||
if mode == BOLD_MODE and c == '*':
|
||||
mode = NO_MODE
|
||||
elif mode == CODE_MODE and c == '~':
|
||||
mode = NO_MODE
|
||||
elif mode == ITALIC_MODE and c == '/':
|
||||
mode = NO_MODE
|
||||
elif mode == STRIKE_MODE and c == '+':
|
||||
mode = NO_MODE
|
||||
elif mode == UNDERLINED_MODE and c == '_':
|
||||
mode = NO_MODE
|
||||
elif mode == VERBATIM_MODE and c == '=':
|
||||
mode = NO_MODE
|
||||
elif c == '\n':
|
||||
raise NotImplementedError("[{} | {}]".format(c, chunk))
|
||||
elif tok_type == TOKEN_TYPE_OPEN_MARKER:
|
||||
mode_tree.append(_MODES[MARKERS[tok_val]])
|
||||
contents.append([])
|
||||
|
||||
elif tok_type == TOKEN_TYPE_CLOSE_MARKER:
|
||||
mode = _MODES[MARKERS[tok_val]]
|
||||
matching_mode = mode_tree.pop()
|
||||
assert mode == matching_mode
|
||||
value = mode(contents.pop(), current_line)
|
||||
current_line += chunk.count('\n')
|
||||
|
||||
if len(mode_tree) == 0: # Closed branch of tree
|
||||
tree.append(value)
|
||||
else:
|
||||
chunk.append(c)
|
||||
print("{} <- {}".format(mode_tree[-1], mode))
|
||||
contents[-1].append(value)
|
||||
|
||||
if mode == NO_MODE:
|
||||
inline.append(MODE_CLASS[was_mode](''.join(chunk)))
|
||||
chunk = []
|
||||
current_line += chunk.count('\n')
|
||||
|
||||
assert(len(chunk) == 0)
|
||||
assert(len(inline) == 0)
|
||||
|
||||
return chunks
|
||||
if len(tree) > 3:
|
||||
print("L", len(tree))
|
||||
print("F:", tree)
|
||||
return tree
|
||||
|
||||
def parse_headline(hl) -> Headline:
|
||||
stars = hl['orig'].group('stars')
|
||||
|
@ -353,8 +487,8 @@ class OrgDom:
|
|||
def dump_contents(self, raw):
|
||||
if isinstance(raw, RawLine):
|
||||
return (raw.linenum, raw.line)
|
||||
else:
|
||||
return (raw.linenum, raw.get_raw())
|
||||
|
||||
return (raw.linenum, raw.get_raw())
|
||||
|
||||
def dump_structural(self, structural: Tuple):
|
||||
return (structural[0], structural[1])
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
from .org_dom import Headline, Line, RawLine
|
||||
|
||||
from .org_dom import Headline, Line, RawLine, Text, Bold, Code, Italic, Strike, Underlined, Verbatim
|
||||
|
||||
def get_hl_raw_contents(doc: Headline) -> str:
|
||||
lines = []
|
||||
|
@ -18,6 +17,8 @@ def get_rawline_contents(doc: RawLine) -> str:
|
|||
def get_span_contents(doc: Line) -> str:
|
||||
return doc.get_raw()
|
||||
|
||||
def get_text_contents(doc: Text) -> str:
|
||||
return doc.get_raw()
|
||||
|
||||
def get_raw_contents(doc) -> str:
|
||||
if isinstance(doc, Headline):
|
||||
|
@ -28,4 +29,7 @@ def get_raw_contents(doc) -> str:
|
|||
return get_span_contents(doc)
|
||||
if isinstance(doc, list):
|
||||
return ''.join([get_raw_contents(chunk) for chunk in doc])
|
||||
if isinstance(doc, (Text, Bold, Code, Italic, Strike, Underlined, Verbatim)):
|
||||
return doc.get_raw()
|
||||
print('Unhandled type: ' + str(doc))
|
||||
raise NotImplementedError('Unhandled type: ' + str(doc))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue