Apply autoformatting with black.

This commit is contained in:
Sergio Martínez Portela 2020-11-26 23:44:56 +01:00
parent d5f8d76aeb
commit 1d71d1a3c3
5 changed files with 302 additions and 168 deletions

View File

@ -63,6 +63,7 @@ INACTIVE_TIME_STAMP_RE = re.compile(r"\[{}\]".format(BASE_TIME_STAMP_RE))
# BASE_TIME_RANGE_RE = (r'(?P<start_year>\d{4})-(?P<start_month>\d{2})-(?P<start_day>\d{2}) (?P<start_dow>[^ ]+)((?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2}))?',
# r'(?P<end_year>\d{4})-(?P<end_month>\d{2})-(?P<end_day>\d{2}) (?P<end_dow>[^ ]+)((?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?')
def get_tokens(value):
if isinstance(value, Text):
return value.contents
@ -70,6 +71,7 @@ def get_tokens(value):
return [value.line]
raise Exception("Unknown how to get tokens from: {}".format(value))
def get_links_from_content(content):
in_link = False
in_description = False
@ -85,7 +87,7 @@ def get_links_from_content(content):
elif tok.tok_type == LinkTokenType.CLOSE:
in_link = False
in_description = False
yield Link(''.join(link_value), ''.join(link_description))
yield Link("".join(link_value), "".join(link_description))
link_value = []
link_description = []
elif isinstance(tok, str) and in_link:
@ -94,8 +96,25 @@ def get_links_from_content(content):
else:
link_value.append(tok)
class Headline:
def __init__(self, start_line, depth, orig, properties, keywords, priority_start, priority, title_start, title, tags_start, tags, contents, children, structural):
def __init__(
self,
start_line,
depth,
orig,
properties,
keywords,
priority_start,
priority,
title_start,
title,
tags_start,
tags,
contents,
children,
structural,
):
self.start_line = start_line
self.depth = depth
self.orig = orig
@ -115,6 +134,7 @@ class Headline:
for content in self.contents:
yield from get_links_from_content(content)
RawLine = collections.namedtuple("RawLine", ("linenum", "line"))
Keyword = collections.namedtuple(
"Keyword", ("linenum", "match", "key", "value", "options")
@ -140,6 +160,7 @@ class MarkerType(Enum):
UNDERLINED_MODE = 0b10000
VERBATIM_MODE = 0b100000
MARKERS = {
"*": MarkerType.BOLD_MODE,
"~": MarkerType.CODE_MODE,
@ -157,14 +178,17 @@ for tok, mode in MARKERS.items():
MarkerToken = collections.namedtuple("MarkerToken", ("closing", "tok_type"))
LinkToken = collections.namedtuple("LinkToken", ("tok_type"))
class LinkTokenType(Enum):
OPEN_LINK = 3
OPEN_DESCRIPTION = 5
CLOSE = 4
BEGIN_PROPERTIES = "OPEN_PROPERTIES"
END_PROPERTIES = "CLOSE_PROPERTIES"
def token_from_type(tok_type):
return ModeToMarker[tok_type]
@ -258,9 +282,9 @@ class Link:
def get_raw(self):
if self.description:
return '[[{}][{}]]'.format(self.value, self.description)
return "[[{}][{}]]".format(self.value, self.description)
else:
return '[[{}]]'.format(self.value)
return "[[{}]]".format(self.value)
class Text:
@ -278,16 +302,16 @@ class Text:
contents.append(chunk)
elif isinstance(chunk, LinkToken):
if chunk.tok_type == LinkTokenType.OPEN_LINK:
contents.append('[[')
contents.append("[[")
elif chunk.tok_type == LinkTokenType.OPEN_DESCRIPTION:
contents.append('][')
contents.append("][")
else:
assert chunk.tok_type == LinkTokenType.CLOSE
contents.append(']]')
contents.append("]]")
else:
assert isinstance(chunk, MarkerToken)
contents.append(token_from_type(chunk.tok_type))
return ''.join(contents)
return "".join(contents)
class Bold:
@ -417,62 +441,67 @@ def tokenize_contents(contents: str):
tokens.append((TOKEN_TYPE_TEXT, "".join(text)))
text = []
cursor = enumerate(contents)
for i, char in cursor:
has_changed = False
# Possible link opening
if char == '[':
if (len(contents) > i + 3
if char == "[":
if (
len(contents) > i + 3
# At least 3 characters more to open and close a link
and contents[i + 1] == '['):
close = contents.find(']', i)
and contents[i + 1] == "["
):
close = contents.find("]", i)
if close != -1 and contents[close + 1] == ']':
if close != -1 and contents[close + 1] == "]":
# Link with no description
cut_string()
in_link = True
tokens.append((TOKEN_TYPE_OPEN_LINK, None))
assert '[' == (next(cursor)[1])
assert "[" == (next(cursor)[1])
last_link_start = i
continue
if close != -1 and contents[close + 1] == '[':
if close != -1 and contents[close + 1] == "[":
# Link with description?
close = contents.find(']', close + 1)
if close != -1 and contents[close + 1] == ']':
close = contents.find("]", close + 1)
if close != -1 and contents[close + 1] == "]":
# No match here means this is not an Org link
cut_string()
in_link = True
tokens.append((TOKEN_TYPE_OPEN_LINK, None))
assert '[' == (next(cursor)[1])
assert "[" == (next(cursor)[1])
last_link_start = i
continue
# Possible link close or open of description
if char == ']' and in_link:
if contents[i + 1] == ']':
if char == "]" and in_link:
if contents[i + 1] == "]":
cut_string()
tokens.append((TOKEN_TYPE_CLOSE_LINK, None))
assert ']' == (next(cursor)[1])
assert "]" == (next(cursor)[1])
in_link = False
in_link_description = False
continue
if contents[i + 1] == '[' and not in_link_description:
if contents[i + 1] == "[" and not in_link_description:
cut_string()
tokens.append((TOKEN_TYPE_OPEN_DESCRIPTION, None))
assert '[' == (next(cursor)[1])
assert "[" == (next(cursor)[1])
continue
raise Exception("Link cannot contain ']' not followed by '[' or ']'. Starting with {}".format(contents[last_link_start:i + 10]))
raise Exception(
"Link cannot contain ']' not followed by '[' or ']'. Starting with {}".format(
contents[last_link_start : i + 10]
)
)
if (in_link and not in_link_description):
if in_link and not in_link_description:
# Link's pointer have no formatting
pass

View File

@ -1,5 +1,15 @@
from .org_dom import (Bold, Code, Headline, Italic, Line, RawLine, Strike,
Text, Underlined, Verbatim)
from .org_dom import (
Bold,
Code,
Headline,
Italic,
Line,
RawLine,
Strike,
Text,
Underlined,
Verbatim,
)
def get_hl_raw_contents(doc: Headline) -> str:
@ -8,7 +18,7 @@ def get_hl_raw_contents(doc: Headline) -> str:
for content in doc.contents:
lines.append(get_raw_contents(content))
raw = ''.join(lines)
raw = "".join(lines)
return raw
@ -19,9 +29,11 @@ def get_rawline_contents(doc: RawLine) -> str:
def get_span_contents(doc: Line) -> str:
return doc.get_raw()
def get_text_contents(doc: Text) -> str:
return doc.get_raw()
def get_raw_contents(doc) -> str:
if isinstance(doc, Headline):
return get_hl_raw_contents(doc)
@ -30,8 +42,8 @@ def get_raw_contents(doc) -> str:
if isinstance(doc, Line):
return get_span_contents(doc)
if isinstance(doc, list):
return ''.join([get_raw_contents(chunk) for chunk in doc])
return "".join([get_raw_contents(chunk) for chunk in doc])
if isinstance(doc, (Text, Bold, Code, Italic, Strike, Underlined, Verbatim)):
return doc.get_raw()
print('Unhandled type: ' + str(doc))
raise NotImplementedError('Unhandled type: ' + str(doc))
print("Unhandled type: " + str(doc))
raise NotImplementedError("Unhandled type: " + str(doc))

View File

@ -1,15 +1,15 @@
from setuptools import setup
setup(
name='org-dom',
version='0.0.1',
description=
'Library to de/serialize org-files and manipulate them in a DOM-like manner.',
author='kenkeiras',
author_email='kenkeiras@codigoparallevar.com',
license='Apache License 2.0',
packages=['org_dom'],
name="org-dom",
version="0.0.1",
description="Library to de/serialize org-files and manipulate them in a DOM-like manner.",
author="kenkeiras",
author_email="kenkeiras@codigoparallevar.com",
license="Apache License 2.0",
packages=["org_dom"],
scripts=[],
include_package_data=False,
install_requires=[],
zip_safe=True)
zip_safe=True,
)

View File

@ -4,37 +4,60 @@ import unittest
from datetime import datetime as DT
from org_dom import dumps, load, loads
from utils.dom_assertions import (BOLD, CODE, HL, ITALIC, SPAN, STRIKE,
UNDERLINED, VERBATIM, WEB_LINK, Dom, Tokens)
from utils.dom_assertions import (
BOLD,
CODE,
HL,
ITALIC,
SPAN,
STRIKE,
UNDERLINED,
VERBATIM,
WEB_LINK,
Dom,
Tokens,
)
DIR = os.path.dirname(os.path.abspath(__file__))
class TestSerde(unittest.TestCase):
def test_simple_file_01(self):
with open(os.path.join(DIR, '01-simple.org')) as f:
with open(os.path.join(DIR, "01-simple.org")) as f:
doc = load(f)
ex = Dom(props=[('TITLE', '01-Simple'),
('DESCRIPTION', 'Simple org file'),
('TODO', 'TODO(t) PAUSED(p) | DONE(d)')],
children=(HL(
'First level',
props=[
('ID', '01-simple-first-level-id'),
('CREATED', DT(2020, 1, 1, 1, 1)),
],
content=' First level content\n',
children=[
HL('Second level',
props=[('ID', '01-simple-second-level-id')],
content='\n Second level content\n',
ex = Dom(
props=[
("TITLE", "01-Simple"),
("DESCRIPTION", "Simple org file"),
("TODO", "TODO(t) PAUSED(p) | DONE(d)"),
],
children=(
HL(
"First level",
props=[
("ID", "01-simple-first-level-id"),
("CREATED", DT(2020, 1, 1, 1, 1)),
],
content=" First level content\n",
children=[
HL(
"Second level",
props=[("ID", "01-simple-second-level-id")],
content="\n Second level content\n",
children=[
HL('Third level',
props=[('ID', '01-simple-third-level-id')],
content='\n Third level content\n')
])
])))
HL(
"Third level",
props=[("ID", "01-simple-third-level-id")],
content="\n Third level content\n",
)
],
)
],
)
),
)
ex.assert_matches(self, doc)
@ -55,119 +78,180 @@ class TestSerde(unittest.TestCase):
self.assertEqual(dumps(doc), orig)
def test_markup_file_02(self):
self.maxDiff = 10000
with open(os.path.join(DIR, '02-markup.org')) as f:
with open(os.path.join(DIR, "02-markup.org")) as f:
doc = load(f)
ex = Dom(props=[('TITLE', '02-Markup'),
('DESCRIPTION', 'Simple org file to test markup'),
('TODO', 'TODO(t) PAUSED(p) | DONE(d)')],
children=(HL('First level',
props=[
('ID', '02-markup-first-level-id'),
('CREATED', DT(2020, 1, 1, 1, 1)),
],
content=[
SPAN(" This is a ", BOLD("bold phrase"),
".\n"),
SPAN("\n"),
SPAN(" This is a ",
VERBATIM("verbatim phrase"), ".\n"),
SPAN("\n"),
SPAN(" This is a ", ITALIC("italic phrase"),
".\n"),
SPAN("\n"),
SPAN(" This is a ",
STRIKE("strike-through phrase"), ".\n"),
SPAN("\n"),
SPAN(" This is a ",
UNDERLINED("underlined phrase"), ".\n"),
SPAN("\n"),
SPAN(" This is a ", CODE("code phrase"),
".\n"),
SPAN("\n"),
SPAN(" This is a nested ", BOLD(["bold ", VERBATIM(["verbatim ", ITALIC(["italic ", STRIKE(["strike ", UNDERLINED(["underlined ", CODE("code ."), " ."]), " ."]), " ."]), " ."]), " ."])),
SPAN("\n"),
SPAN("\n"),
# THIS IS INTERLEAVED, not nested
SPAN([" This is a interleaved ",
Tokens.BOLD_START,
"bold ",
Tokens.VERBATIM_START,
"verbatim ",
Tokens.ITALIC_START,
"italic ",
Tokens.STRIKE_START,
"strike ",
Tokens.UNDERLINED_START,
"underlined ",
Tokens.CODE_START,
"code .",
Tokens.BOLD_END,
" .",
Tokens.VERBATIM_END,
" .",
Tokens.ITALIC_END,
" .",
Tokens.STRIKE_END,
" .",
Tokens.UNDERLINED_END,
" .",
Tokens.CODE_END,
"\n"]),
SPAN("\n"),
SPAN(" This is a _ non-underlined phrase because an incorrectly placed content _.\n"),
SPAN("\n"),
SPAN(" This is a _ non-underlined phrase because an incorrectly placed content beginning_.\n"),
SPAN("\n"),
SPAN(""),
SPAN(" This is a _non-underlined phrase because an incorrectly placed content end _.\n"),
SPAN("\n"),
SPAN(""),
SPAN(" This is a _non-underlined phrase because the lack of an end.\n"),
SPAN("\n"),
SPAN("\n"),
SPAN(" This is a _non-underlined phrase because an empty line between beginning and\n"),
SPAN("\n"),
SPAN(""),
SPAN(" end._\n"),
])))
ex = Dom(
props=[
("TITLE", "02-Markup"),
("DESCRIPTION", "Simple org file to test markup"),
("TODO", "TODO(t) PAUSED(p) | DONE(d)"),
],
children=(
HL(
"First level",
props=[
("ID", "02-markup-first-level-id"),
("CREATED", DT(2020, 1, 1, 1, 1)),
],
content=[
SPAN(" This is a ", BOLD("bold phrase"), ".\n"),
SPAN("\n"),
SPAN(" This is a ", VERBATIM("verbatim phrase"), ".\n"),
SPAN("\n"),
SPAN(" This is a ", ITALIC("italic phrase"), ".\n"),
SPAN("\n"),
SPAN(" This is a ", STRIKE("strike-through phrase"), ".\n"),
SPAN("\n"),
SPAN(" This is a ", UNDERLINED("underlined phrase"), ".\n"),
SPAN("\n"),
SPAN(" This is a ", CODE("code phrase"), ".\n"),
SPAN("\n"),
SPAN(
" This is a nested ",
BOLD(
[
"bold ",
VERBATIM(
[
"verbatim ",
ITALIC(
[
"italic ",
STRIKE(
[
"strike ",
UNDERLINED(
[
"underlined ",
CODE("code ."),
" .",
]
),
" .",
]
),
" .",
]
),
" .",
]
),
" .",
]
),
),
SPAN("\n"),
SPAN("\n"),
# THIS IS INTERLEAVED, not nested
SPAN(
[
" This is a interleaved ",
Tokens.BOLD_START,
"bold ",
Tokens.VERBATIM_START,
"verbatim ",
Tokens.ITALIC_START,
"italic ",
Tokens.STRIKE_START,
"strike ",
Tokens.UNDERLINED_START,
"underlined ",
Tokens.CODE_START,
"code .",
Tokens.BOLD_END,
" .",
Tokens.VERBATIM_END,
" .",
Tokens.ITALIC_END,
" .",
Tokens.STRIKE_END,
" .",
Tokens.UNDERLINED_END,
" .",
Tokens.CODE_END,
"\n",
]
),
SPAN("\n"),
SPAN(
" This is a _ non-underlined phrase because an incorrectly placed content _.\n"
),
SPAN("\n"),
SPAN(
" This is a _ non-underlined phrase because an incorrectly placed content beginning_.\n"
),
SPAN("\n"),
SPAN(""),
SPAN(
" This is a _non-underlined phrase because an incorrectly placed content end _.\n"
),
SPAN("\n"),
SPAN(""),
SPAN(
" This is a _non-underlined phrase because the lack of an end.\n"
),
SPAN("\n"),
SPAN("\n"),
SPAN(
" This is a _non-underlined phrase because an empty line between beginning and\n"
),
SPAN("\n"),
SPAN(""),
SPAN(" end._\n"),
],
)
),
)
ex.assert_matches(self, doc)
def test_links_file_03(self):
with open(os.path.join(DIR, '03-links.org')) as f:
with open(os.path.join(DIR, "03-links.org")) as f:
doc = load(f)
links = list(doc.get_links())
self.assertEqual(len(links), 2)
self.assertEqual(links[0].value, 'https://codigoparallevar.com/1')
self.assertEqual(links[0].description, 'web link')
self.assertEqual(links[0].value, "https://codigoparallevar.com/1")
self.assertEqual(links[0].description, "web link")
self.assertEqual(links[1].value, 'https://codigoparallevar.com/2')
self.assertEqual(links[1].description, 'web link')
ex = Dom(props=[('TITLE', '03-Links'),
('DESCRIPTION', 'Simple org file to test links'),
('TODO', 'TODO(t) PAUSED(p) | DONE(d)')],
children=(HL('First level',
props=[
('ID', '03-markup-first-level-id'),
('CREATED', DT(2020, 1, 1, 1, 1)),
],
content=[
SPAN(" This is a ", WEB_LINK("web link", "https://codigoparallevar.com/1"),
".\n"),
SPAN("\n"),
SPAN(" This is a ", ITALIC(["italized ", WEB_LINK("web link", "https://codigoparallevar.com/2")]),
".\n"),
])))
self.assertEqual(links[1].value, "https://codigoparallevar.com/2")
self.assertEqual(links[1].description, "web link")
ex = Dom(
props=[
("TITLE", "03-Links"),
("DESCRIPTION", "Simple org file to test links"),
("TODO", "TODO(t) PAUSED(p) | DONE(d)"),
],
children=(
HL(
"First level",
props=[
("ID", "03-markup-first-level-id"),
("CREATED", DT(2020, 1, 1, 1, 1)),
],
content=[
SPAN(
" This is a ",
WEB_LINK("web link", "https://codigoparallevar.com/1"),
".\n",
),
SPAN("\n"),
SPAN(
" This is a ",
ITALIC(
[
"italized ",
WEB_LINK(
"web link", "https://codigoparallevar.com/2"
),
]
),
".\n",
),
],
)
),
)
ex.assert_matches(self, doc)

View File

@ -2,8 +2,17 @@ import collections
import unittest
from datetime import datetime
from org_dom import (Bold, Code, Italic, Line, Strike, Text, Underlined,
Verbatim, get_raw_contents)
from org_dom import (
Bold,
Code,
Italic,
Line,
Strike,
Text,
Underlined,
Verbatim,
get_raw_contents,
)
def timestamp_to_datetime(ts):