forked from kenkeiras/org-rw
Pass markup tests.
This commit is contained in:
parent
0dab7e4703
commit
5b886e5e24
@ -109,28 +109,187 @@ def timestamp_to_string(ts):
|
|||||||
else:
|
else:
|
||||||
return '[{}]'.format(base)
|
return '[{}]'.format(base)
|
||||||
|
|
||||||
|
|
||||||
|
class Line:
|
||||||
|
def __init__(self, linenum, contents):
|
||||||
|
self.linenum = linenum
|
||||||
|
self.contents = contents
|
||||||
|
|
||||||
|
def get_raw(self):
|
||||||
|
rawchunks = []
|
||||||
|
for chunk in self.contents:
|
||||||
|
if isinstance(chunk, str):
|
||||||
|
rawchunks.append(chunk)
|
||||||
|
else:
|
||||||
|
rawchunks.append(chunk.get_raw())
|
||||||
|
return ''.join(rawchunks) + '\n'
|
||||||
|
|
||||||
|
class Text:
|
||||||
|
def __init__(self, contents):
|
||||||
|
self.contents = contents
|
||||||
|
|
||||||
|
def get_raw(self):
|
||||||
|
raw = ''.join(self.contents)
|
||||||
|
return raw
|
||||||
|
|
||||||
|
class Bold:
|
||||||
|
def __init__(self, contents):
|
||||||
|
self.contents = contents
|
||||||
|
|
||||||
|
def get_raw(self):
|
||||||
|
raw = ''.join(self.contents)
|
||||||
|
return f"*{raw}*"
|
||||||
|
|
||||||
|
class Code:
|
||||||
|
def __init__(self, contents):
|
||||||
|
self.contents = contents
|
||||||
|
|
||||||
|
def get_raw(self):
|
||||||
|
raw = ''.join(self.contents)
|
||||||
|
return f"~{raw}~"
|
||||||
|
|
||||||
|
class Italic:
|
||||||
|
def __init__(self, contents):
|
||||||
|
self.contents = contents
|
||||||
|
|
||||||
|
def get_raw(self):
|
||||||
|
raw = ''.join(self.contents)
|
||||||
|
return f"/{raw}/"
|
||||||
|
|
||||||
|
class Strike:
|
||||||
|
def __init__(self, contents):
|
||||||
|
self.contents = contents
|
||||||
|
|
||||||
|
def get_raw(self):
|
||||||
|
raw = ''.join(self.contents)
|
||||||
|
return f"+{raw}+"
|
||||||
|
|
||||||
|
class Underlined:
|
||||||
|
def __init__(self, contents):
|
||||||
|
self.contents = contents
|
||||||
|
|
||||||
|
def get_raw(self):
|
||||||
|
raw = ''.join(self.contents)
|
||||||
|
return f"_{raw}_"
|
||||||
|
|
||||||
|
class Verbatim:
|
||||||
|
def __init__(self, contents):
|
||||||
|
self.contents = contents
|
||||||
|
|
||||||
|
def get_raw(self):
|
||||||
|
raw = ''.join(self.contents)
|
||||||
|
return f"={raw}="
|
||||||
|
|
||||||
|
|
||||||
|
def parse_contents(raw_contents:List[RawLine]):
|
||||||
|
NO_MODE = 0
|
||||||
|
BOLD_MODE = 1
|
||||||
|
CODE_MODE = 2
|
||||||
|
ITALIC_MODE = 3
|
||||||
|
STRIKE_MODE = 4
|
||||||
|
UNDERLINED_MODE = 5
|
||||||
|
VERBATIM_MODE = 6
|
||||||
|
|
||||||
|
MODE_CLASS = {
|
||||||
|
NO_MODE: Line,
|
||||||
|
BOLD_MODE: Bold,
|
||||||
|
CODE_MODE: Code,
|
||||||
|
ITALIC_MODE: Italic,
|
||||||
|
STRIKE_MODE: Strike,
|
||||||
|
UNDERLINED_MODE: Underlined,
|
||||||
|
VERBATIM_MODE: Verbatim,
|
||||||
|
}
|
||||||
|
|
||||||
|
mode = NO_MODE
|
||||||
|
escaped = False
|
||||||
|
|
||||||
|
chunk = []
|
||||||
|
inline = []
|
||||||
|
chunks = []
|
||||||
|
|
||||||
|
linenum = start_linenum = raw_contents[0].linenum
|
||||||
|
contents_buff = []
|
||||||
|
for line in raw_contents:
|
||||||
|
contents_buff.append(line.line)
|
||||||
|
|
||||||
|
contents = '\n'.join(contents_buff)
|
||||||
|
|
||||||
|
for c in contents:
|
||||||
|
if mode == NO_MODE:
|
||||||
|
if escaped:
|
||||||
|
chunk.append(c)
|
||||||
|
escaped = False
|
||||||
|
|
||||||
|
elif c == '\\':
|
||||||
|
escaped = True
|
||||||
|
elif c == '*':
|
||||||
|
mode = BOLD_MODE
|
||||||
|
elif c == '~':
|
||||||
|
mode = CODE_MODE
|
||||||
|
elif c == '/':
|
||||||
|
mode = ITALIC_MODE
|
||||||
|
elif c == '+':
|
||||||
|
mode = STRIKE_MODE
|
||||||
|
elif c == '_':
|
||||||
|
mode = UNDERLINED_MODE
|
||||||
|
elif c == '=':
|
||||||
|
mode = VERBATIM_MODE
|
||||||
|
elif c == '\n':
|
||||||
|
chunks.append(Line(linenum, inline + [Text(chunk)]))
|
||||||
|
chunk = []
|
||||||
|
inline = []
|
||||||
|
else:
|
||||||
|
chunk.append(c)
|
||||||
|
|
||||||
|
if mode != NO_MODE:
|
||||||
|
inline.append(Text([''.join(chunk)]))
|
||||||
|
chunk = []
|
||||||
|
else:
|
||||||
|
if escaped:
|
||||||
|
chunk.append(c)
|
||||||
|
escaped = False
|
||||||
|
|
||||||
|
was_mode = mode
|
||||||
|
if mode == BOLD_MODE and c == '*':
|
||||||
|
mode = NO_MODE
|
||||||
|
elif mode == CODE_MODE and c == '~':
|
||||||
|
mode = NO_MODE
|
||||||
|
elif mode == ITALIC_MODE and c == '/':
|
||||||
|
mode = NO_MODE
|
||||||
|
elif mode == STRIKE_MODE and c == '+':
|
||||||
|
mode = NO_MODE
|
||||||
|
elif mode == UNDERLINED_MODE and c == '_':
|
||||||
|
mode = NO_MODE
|
||||||
|
elif mode == VERBATIM_MODE and c == '=':
|
||||||
|
mode = NO_MODE
|
||||||
|
elif c == '\n':
|
||||||
|
raise NotImplementedError("[{} | {}]".format(c, chunk))
|
||||||
|
else:
|
||||||
|
chunk.append(c)
|
||||||
|
|
||||||
|
if mode == NO_MODE:
|
||||||
|
inline.append(MODE_CLASS[was_mode](''.join(chunk)))
|
||||||
|
chunk = []
|
||||||
|
|
||||||
|
assert(len(chunk) == 0)
|
||||||
|
assert(len(inline) == 0)
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
def parse_headline(hl) -> Headline:
|
def parse_headline(hl) -> Headline:
|
||||||
# 'linenum': linenum,
|
|
||||||
# 'orig': match,
|
|
||||||
# 'title': match.group('line'),
|
|
||||||
# 'contents': [],
|
|
||||||
# 'children': [],
|
|
||||||
# 'keywords': [],
|
|
||||||
# 'properties': [],
|
|
||||||
# 'structural': [],
|
|
||||||
# HEADLINE_RE = re.compile(r'^(?P<stars>\*+) (?P<spacing>\s*)(?P<line>.*)$')
|
|
||||||
stars = hl['orig'].group('stars')
|
stars = hl['orig'].group('stars')
|
||||||
depth = len(stars)
|
depth = len(stars)
|
||||||
|
|
||||||
# TODO: Parse line for priority, cookies and tags
|
# TODO: Parse line for priority, cookies and tags
|
||||||
line = hl['orig'].group('line')
|
line = hl['orig'].group('line')
|
||||||
title = line.strip()
|
title = line.strip()
|
||||||
|
contents = parse_contents(hl['contents'])
|
||||||
|
|
||||||
return Headline(start_line=hl['linenum'],
|
return Headline(start_line=hl['linenum'],
|
||||||
depth=depth,
|
depth=depth,
|
||||||
orig=hl['orig'],
|
orig=hl['orig'],
|
||||||
title=title,
|
title=title,
|
||||||
contents=hl['contents'],
|
contents=contents,
|
||||||
children=[parse_headline(child) for child in hl['children']],
|
children=[parse_headline(child) for child in hl['children']],
|
||||||
keywords=hl['keywords'],
|
keywords=hl['keywords'],
|
||||||
properties=hl['properties'],
|
properties=hl['properties'],
|
||||||
@ -191,8 +350,11 @@ class OrgDom:
|
|||||||
value=value,
|
value=value,
|
||||||
))
|
))
|
||||||
|
|
||||||
def dump_contents(self, raw: RawLine):
|
def dump_contents(self, raw):
|
||||||
|
if isinstance(raw, RawLine):
|
||||||
return (raw.linenum, raw.line)
|
return (raw.linenum, raw.line)
|
||||||
|
else:
|
||||||
|
return (raw.linenum, raw.get_raw())
|
||||||
|
|
||||||
def dump_structural(self, structural: Tuple):
|
def dump_structural(self, structural: Tuple):
|
||||||
return (structural[0], structural[1])
|
return (structural[0], structural[1])
|
||||||
@ -227,18 +389,21 @@ class OrgDom:
|
|||||||
|
|
||||||
if ltype == PROPERTIES_T and last_type not in (STRUCTURAL_T, PROPERTIES_T):
|
if ltype == PROPERTIES_T and last_type not in (STRUCTURAL_T, PROPERTIES_T):
|
||||||
# No structural opening
|
# No structural opening
|
||||||
structured_lines.append(' ' * content.index(':') + ':PROPERTIES:')
|
structured_lines.append(' ' * content.index(':') + ':PROPERTIES:\n')
|
||||||
logging.warning("Added structural: ".format(line[1][0], structured_lines[-1].strip()))
|
logging.warning("Added structural: ".format(line[1][0], structured_lines[-1].strip()))
|
||||||
elif ltype not in (STRUCTURAL_T, PROPERTIES_T) and last_type == PROPERTIES_T:
|
elif ltype not in (STRUCTURAL_T, PROPERTIES_T) and last_type == PROPERTIES_T:
|
||||||
# No structural closing
|
# No structural closing
|
||||||
last_line = lines[i - 1][1][1]
|
last_line = lines[i - 1][1][1]
|
||||||
structured_lines.append(' ' * last_line.index(':') + ':END:')
|
structured_lines.append(' ' * last_line.index(':') + ':END:\n')
|
||||||
logging.warning("Added structural:{}: {}".format(line[1][0], structured_lines[-1].strip()))
|
logging.warning("Added structural:{}: {}".format(line[1][0], structured_lines[-1].strip()))
|
||||||
|
|
||||||
|
elif ltype != CONTENT_T:
|
||||||
|
content = content + '\n'
|
||||||
|
|
||||||
last_type = ltype
|
last_type = ltype
|
||||||
structured_lines.append(content)
|
structured_lines.append(content)
|
||||||
|
|
||||||
yield from structured_lines
|
yield ''.join(structured_lines)
|
||||||
|
|
||||||
for child in headline.children:
|
for child in headline.children:
|
||||||
yield from self.dump_headline(child)
|
yield from self.dump_headline(child)
|
||||||
@ -372,5 +537,7 @@ def load(f, environment=BASE_ENVIRONMENT, extra_cautious=False):
|
|||||||
|
|
||||||
|
|
||||||
def dumps(doc):
|
def dumps(doc):
|
||||||
result = '\n'.join(doc.dump())
|
dump = list(doc.dump())
|
||||||
|
result = '\n'.join(dump)
|
||||||
|
print(result)
|
||||||
return result
|
return result
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from .org_dom import Headline, RawLine
|
from .org_dom import Headline, Line, RawLine
|
||||||
|
|
||||||
|
|
||||||
def get_hl_raw_contents(doc: Headline) -> str:
|
def get_hl_raw_contents(doc: Headline) -> str:
|
||||||
@ -7,16 +7,25 @@ def get_hl_raw_contents(doc: Headline) -> str:
|
|||||||
for content in doc.contents:
|
for content in doc.contents:
|
||||||
lines.append(get_raw_contents(content))
|
lines.append(get_raw_contents(content))
|
||||||
|
|
||||||
return '\n'.join(lines)
|
raw = ''.join(lines)
|
||||||
|
return raw
|
||||||
|
|
||||||
|
|
||||||
def get_rawline_contents(doc: RawLine) -> str:
|
def get_rawline_contents(doc: RawLine) -> str:
|
||||||
return doc.line
|
return doc.line
|
||||||
|
|
||||||
|
|
||||||
|
def get_span_contents(doc: Line) -> str:
|
||||||
|
return doc.get_raw()
|
||||||
|
|
||||||
|
|
||||||
def get_raw_contents(doc) -> str:
|
def get_raw_contents(doc) -> str:
|
||||||
if isinstance(doc, Headline):
|
if isinstance(doc, Headline):
|
||||||
return get_hl_raw_contents(doc)
|
return get_hl_raw_contents(doc)
|
||||||
if isinstance(doc, RawLine):
|
if isinstance(doc, RawLine):
|
||||||
return get_rawline_contents(doc)
|
return get_rawline_contents(doc)
|
||||||
|
if isinstance(doc, Line):
|
||||||
|
return get_span_contents(doc)
|
||||||
|
if isinstance(doc, list):
|
||||||
|
return ''.join([get_raw_contents(chunk) for chunk in doc])
|
||||||
raise NotImplementedError('Unhandled type: ' + str(doc))
|
raise NotImplementedError('Unhandled type: ' + str(doc))
|
||||||
|
@ -76,7 +76,6 @@ class TestSerde(unittest.TestCase):
|
|||||||
SPAN(""),
|
SPAN(""),
|
||||||
SPAN(" This is a ", CODE("code phrase"),
|
SPAN(" This is a ", CODE("code phrase"),
|
||||||
"."),
|
"."),
|
||||||
SPAN(""),
|
|
||||||
])))
|
])))
|
||||||
|
|
||||||
ex.assert_matches(self, doc)
|
ex.assert_matches(self, doc)
|
||||||
|
@ -2,7 +2,7 @@ import collections
|
|||||||
import unittest
|
import unittest
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from org_dom import get_raw_contents
|
from org_dom import Line, Text, Bold, Code, Italic, Strike, Underlined, Verbatim, get_raw_contents
|
||||||
|
|
||||||
|
|
||||||
def timestamp_to_datetime(ts):
|
def timestamp_to_datetime(ts):
|
||||||
@ -68,10 +68,12 @@ class HL:
|
|||||||
if isinstance(self.content, str):
|
if isinstance(self.content, str):
|
||||||
test_case.assertEqual(get_raw_contents(doc), self.content)
|
test_case.assertEqual(get_raw_contents(doc), self.content)
|
||||||
else:
|
else:
|
||||||
|
if len(doc.contents) != len(self.content):
|
||||||
|
print("Contents:", doc.contents)
|
||||||
|
print("Expected:", self.content)
|
||||||
test_case.assertEqual(len(doc.contents), len(self.content))
|
test_case.assertEqual(len(doc.contents), len(self.content))
|
||||||
for i, content in enumerate(self.content):
|
for i, content in enumerate(self.content):
|
||||||
test_case.assertEqual(get_raw_contents(doc.contents[i]),
|
content.assert_matches(test_case, doc.contents[i])
|
||||||
content.to_raw())
|
|
||||||
|
|
||||||
# Check children
|
# Check children
|
||||||
if self.children is None:
|
if self.children is None:
|
||||||
@ -99,6 +101,16 @@ class SPAN:
|
|||||||
|
|
||||||
return ''.join(chunks)
|
return ''.join(chunks)
|
||||||
|
|
||||||
|
def assert_matches(self, test_case, doc):
|
||||||
|
if not isinstance(doc, Line):
|
||||||
|
return False
|
||||||
|
for i, section in enumerate(self.contents):
|
||||||
|
if isinstance(section, str):
|
||||||
|
test_case.assertTrue(isinstance(doc.contents[i], Text))
|
||||||
|
test_case.assertEqual(section, doc.contents[i].get_raw())
|
||||||
|
else:
|
||||||
|
section.assertEqual(test_case, doc.contents[i])
|
||||||
|
|
||||||
|
|
||||||
class BOLD:
|
class BOLD:
|
||||||
def __init__(self, text):
|
def __init__(self, text):
|
||||||
@ -107,6 +119,10 @@ class BOLD:
|
|||||||
def to_raw(self):
|
def to_raw(self):
|
||||||
return '*{}*'.format(self.text)
|
return '*{}*'.format(self.text)
|
||||||
|
|
||||||
|
def assertEqual(self, test_case, other):
|
||||||
|
test_case.assertTrue(isinstance(other, Bold))
|
||||||
|
test_case.assertEqual(self.text, other.contents)
|
||||||
|
|
||||||
|
|
||||||
class CODE:
|
class CODE:
|
||||||
def __init__(self, text):
|
def __init__(self, text):
|
||||||
@ -115,6 +131,9 @@ class CODE:
|
|||||||
def to_raw(self):
|
def to_raw(self):
|
||||||
return '~{}~'.format(self.text)
|
return '~{}~'.format(self.text)
|
||||||
|
|
||||||
|
def assertEqual(self, test_case, other):
|
||||||
|
test_case.assertTrue(isinstance(other, Code))
|
||||||
|
test_case.assertEqual(self.text, other.contents)
|
||||||
|
|
||||||
class ITALIC:
|
class ITALIC:
|
||||||
def __init__(self, text):
|
def __init__(self, text):
|
||||||
@ -123,6 +142,9 @@ class ITALIC:
|
|||||||
def to_raw(self):
|
def to_raw(self):
|
||||||
return '/{}/'.format(self.text)
|
return '/{}/'.format(self.text)
|
||||||
|
|
||||||
|
def assertEqual(self, test_case, other):
|
||||||
|
test_case.assertTrue(isinstance(other, Italic))
|
||||||
|
test_case.assertEqual(self.text, other.contents)
|
||||||
|
|
||||||
class STRIKE:
|
class STRIKE:
|
||||||
def __init__(self, text):
|
def __init__(self, text):
|
||||||
@ -131,6 +153,10 @@ class STRIKE:
|
|||||||
def to_raw(self):
|
def to_raw(self):
|
||||||
return '+{}+'.format(self.text)
|
return '+{}+'.format(self.text)
|
||||||
|
|
||||||
|
def assertEqual(self, test_case, other):
|
||||||
|
test_case.assertTrue(isinstance(other, Strike))
|
||||||
|
test_case.assertEqual(self.text, other.contents)
|
||||||
|
|
||||||
|
|
||||||
class UNDERLINED:
|
class UNDERLINED:
|
||||||
def __init__(self, text):
|
def __init__(self, text):
|
||||||
@ -139,6 +165,9 @@ class UNDERLINED:
|
|||||||
def to_raw(self):
|
def to_raw(self):
|
||||||
return '_{}_'.format(self.text)
|
return '_{}_'.format(self.text)
|
||||||
|
|
||||||
|
def assertEqual(self, test_case, other):
|
||||||
|
test_case.assertTrue(isinstance(other, Underlined))
|
||||||
|
test_case.assertEqual(self.text, other.contents)
|
||||||
|
|
||||||
class VERBATIM:
|
class VERBATIM:
|
||||||
def __init__(self, text):
|
def __init__(self, text):
|
||||||
@ -146,3 +175,7 @@ class VERBATIM:
|
|||||||
|
|
||||||
def to_raw(self):
|
def to_raw(self):
|
||||||
return '={}='.format(self.text)
|
return '={}='.format(self.text)
|
||||||
|
|
||||||
|
def assertEqual(self, test_case, other):
|
||||||
|
test_case.assertTrue(isinstance(other, Verbatim))
|
||||||
|
test_case.assertEqual(self.text, other.contents)
|
||||||
|
Loading…
Reference in New Issue
Block a user