(In progress) Add support for SRC code blocks.

- Add tests for blocks.
- Add Regexps.
- Correctly handle Headlines with split contents.
This commit is contained in:
Sergio Martínez Portela 2020-12-04 00:04:56 +01:00
parent 1c0ecbf8c6
commit a5bfeadfeb
3 changed files with 150 additions and 31 deletions

View File

@ -52,7 +52,7 @@ PROPERTY_DRAWER_RE = re.compile(
LOGBOOK_DRAWER_RE = re.compile( LOGBOOK_DRAWER_RE = re.compile(
r"^(?P<indentation>\s*):LOGBOOK:(?P<end_indentation>\s*)$" r"^(?P<indentation>\s*):LOGBOOK:(?P<end_indentation>\s*)$"
) )
DRAWER_END_RE = re.compile(r"^(?P<indentation>\s*):END:(?P<end_indentation>\s*)$") DRAWER_END_RE = re.compile(r"^(?P<indentation>\s*):END:(?P<end_indentation>\s*)$", re.I)
NODE_PROPERTIES_RE = re.compile( NODE_PROPERTIES_RE = re.compile(
r"^(?P<indentation>\s*):(?P<key>[^+:]+)(?P<plus>\+)?:(?P<spacing>\s*)(?P<value>.*)$" r"^(?P<indentation>\s*):(?P<key>[^+:]+)(?P<plus>\+)?:(?P<spacing>\s*)(?P<value>.*)$"
) )
@ -62,6 +62,12 @@ BASE_TIME_STAMP_RE = r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}) (?P<dow>[
ACTIVE_TIME_STAMP_RE = re.compile(r"<{}>".format(BASE_TIME_STAMP_RE)) ACTIVE_TIME_STAMP_RE = re.compile(r"<{}>".format(BASE_TIME_STAMP_RE))
INACTIVE_TIME_STAMP_RE = re.compile(r"\[{}\]".format(BASE_TIME_STAMP_RE)) INACTIVE_TIME_STAMP_RE = re.compile(r"\[{}\]".format(BASE_TIME_STAMP_RE))
# Org-Babel
BEGIN_SRC_RE = re.compile(r"^\s*#\+BEGIN_SRC(\s+(?P<content>.*))?$")
END_SRC_RE = re.compile(r"^\s*#\+END_SRC\s*$")
RESULTS_DRAWER_RE = re.compile(r"^\s*:results:\s*$")
# BASE_TIME_RANGE_RE = (r'(?P<start_year>\d{4})-(?P<start_month>\d{2})-(?P<start_day>\d{2}) (?P<start_dow>[^ ]+)((?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2}))?', # BASE_TIME_RANGE_RE = (r'(?P<start_year>\d{4})-(?P<start_month>\d{2})-(?P<start_day>\d{2}) (?P<start_dow>[^ ]+)((?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2}))?',
# r'(?P<end_year>\d{4})-(?P<end_month>\d{2})-(?P<end_day>\d{2}) (?P<end_dow>[^ ]+)((?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?') # r'(?P<end_year>\d{4})-(?P<end_month>\d{2})-(?P<end_day>\d{2}) (?P<end_dow>[^ ]+)((?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?')
@ -552,6 +558,35 @@ def parse_contents(raw_contents: List[RawLine]):
if len(raw_contents) == 0: if len(raw_contents) == 0:
return [] return []
blocks = []
current_block = []
for line in raw_contents:
if len(current_block) == 0:
# Seed the first block
current_line = line.linenum
current_block.append(line)
else:
if line.linenum == current_line + 1:
# Continue with the current block
current_line = line.linenum
current_block.append(line)
else:
# Mark the finishing block as not the last line
current_block.append(RawLine(current_line + 1, ''))
# Split the blocks
blocks.append(current_block)
current_line = line.linenum
current_block = [line]
# Check that the current block is not left behind
if len(current_block) > 0:
blocks.append(current_block)
return [parse_content_block(block) for block in blocks]
def parse_content_block(raw_contents: List[RawLine]):
contents_buff = [] contents_buff = []
for line in raw_contents: for line in raw_contents:
contents_buff.append(line.line) contents_buff.append(line.line)
@ -576,7 +611,7 @@ def parse_contents(raw_contents: List[RawLine]):
elif tok_type == TOKEN_TYPE_CLOSE_LINK: elif tok_type == TOKEN_TYPE_CLOSE_LINK:
contents.append(LinkToken(LinkTokenType.CLOSE)) contents.append(LinkToken(LinkTokenType.CLOSE))
return [Text(contents, current_line)] return Text(contents, current_line)
def parse_headline(hl) -> Headline: def parse_headline(hl) -> Headline:
@ -776,6 +811,7 @@ class OrgDomReader:
"children": [], "children": [],
"keywords": [], "keywords": [],
"properties": [], "properties": [],
"results": [], # TODO: Move to each specific code block
"logbook": [], "logbook": [],
"structural": [], "structural": [],
} }
@ -813,10 +849,28 @@ class OrgDomReader:
else: else:
self.headline_hierarchy[-1]["contents"].append(raw) self.headline_hierarchy[-1]["contents"].append(raw)
def add_begin_src_line(self, linenum: int, match: re.Match) -> int:
raw = RawLine(linenum, match.group(0))
if len(self.headline_hierarchy) == 0:
self.contents.append(raw)
else:
self.headline_hierarchy[-1]["contents"].append(raw)
def add_end_src_line(self, linenum: int, match: re.Match) -> int:
raw = RawLine(linenum, match.group(0))
if len(self.headline_hierarchy) == 0:
self.contents.append(raw)
else:
self.headline_hierarchy[-1]["contents"].append(raw)
def add_property_drawer_line(self, linenum: int, line: str, match: re.Match) -> int: def add_property_drawer_line(self, linenum: int, line: str, match: re.Match) -> int:
self.current_drawer = self.headline_hierarchy[-1]["properties"] self.current_drawer = self.headline_hierarchy[-1]["properties"]
self.headline_hierarchy[-1]["structural"].append((linenum, line)) self.headline_hierarchy[-1]["structural"].append((linenum, line))
def add_results_drawer_line(self, linenum: int, line: str, match: re.Match) -> int:
self.current_drawer = self.headline_hierarchy[-1]["results"]
self.headline_hierarchy[-1]["structural"].append((linenum, line))
def add_logbook_drawer_line(self, linenum: int, line: str, match: re.Match) -> int: def add_logbook_drawer_line(self, linenum: int, line: str, match: re.Match) -> int:
self.current_drawer = self.headline_hierarchy[-1]["logbook"] self.current_drawer = self.headline_hierarchy[-1]["logbook"]
self.headline_hierarchy[-1]["structural"].append((linenum, line)) self.headline_hierarchy[-1]["structural"].append((linenum, line))
@ -843,25 +897,40 @@ class OrgDomReader:
def read(self, s, environment): def read(self, s, environment):
lines = s.split("\n") lines = s.split("\n")
line_count = len(lines)
reader = enumerate(lines) reader = enumerate(lines)
for linenum, line in reader: for linenum, line in reader:
if m := RAW_LINE_RE.match(line): try:
self.add_raw_line(linenum, line) last_line = linenum + 1 == line_count
elif m := HEADLINE_RE.match(line):
self.add_headline(linenum, m) if m := RAW_LINE_RE.match(line):
elif m := KEYWORDS_RE.match(line): self.add_raw_line(linenum, line)
self.add_keyword_line(linenum, m) elif m := HEADLINE_RE.match(line):
elif m := PROPERTY_DRAWER_RE.match(line): self.add_headline(linenum, m)
self.add_property_drawer_line(linenum, line, m) elif m := KEYWORDS_RE.match(line):
elif m := LOGBOOK_DRAWER_RE.match(line): self.add_keyword_line(linenum, m)
self.add_logbook_drawer_line(linenum, line, m) elif m := PROPERTY_DRAWER_RE.match(line):
elif m := DRAWER_END_RE.match(line): self.add_property_drawer_line(linenum, line, m)
self.add_drawer_end_line(linenum, line, m) elif m := LOGBOOK_DRAWER_RE.match(line):
elif m := NODE_PROPERTIES_RE.match(line): self.add_logbook_drawer_line(linenum, line, m)
self.add_node_properties_line(linenum, m) elif m := DRAWER_END_RE.match(line):
else: self.add_drawer_end_line(linenum, line, m)
raise NotImplementedError("{}: {}".format(linenum, line)) elif m := RESULTS_DRAWER_RE.match(line):
self.add_results_drawer_line(linenum, line, m)
elif m := NODE_PROPERTIES_RE.match(line):
self.add_node_properties_line(linenum, m)
# Org-babel
elif m := BEGIN_SRC_RE.match(line):
self.add_begin_src_line(linenum, m)
elif m := END_SRC_RE.match(line):
self.add_end_src_line(linenum, m)
# Not captured
else:
self.add_raw_line(linenum, line)
except:
logging.error("Error line {}: {}".format(linenum + 1, line))
raise
def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=True): def loads(s, environment=BASE_ENVIRONMENT, extra_cautious=True):

34
tests/04-code.org Normal file
View File

@ -0,0 +1,34 @@
#+TITLE: 04-Code
#+DESCRIPTION: Simple org file
#+TODO: TODO(t) PAUSED(p) | DONE(d)
* First Item
:PROPERTIES:
:ID: 04-code-first-item-id
:CREATED: [2020-01-01 Wed 01:01]
:END:
#+BEGIN_SRC shell
echo "This is a test"
exit 0 # Exit successfully
#+END_SRC
#+RESULTS:
: This is a test
* Second item
:PROPERTIES:
:ID: 04-code-second-item-id
:CREATED: [2020-01-01 Wed 01:01]
:END:
#+BEGIN_SRC shell :results drawer
echo "This is another test"
exit 0 # Comment
#+END_SRC
#+RESULTS:
:results:
This is another test
:end:

View File

@ -5,19 +5,8 @@ from datetime import datetime as DT
from org_dom import dumps, load, loads from org_dom import dumps, load, loads
from utils.dom_assertions import ( from utils.dom_assertions import (BOLD, CODE, HL, ITALIC, SPAN, STRIKE,
BOLD, UNDERLINED, VERBATIM, WEB_LINK, Dom, Tokens)
CODE,
HL,
ITALIC,
SPAN,
STRIKE,
UNDERLINED,
VERBATIM,
WEB_LINK,
Dom,
Tokens,
)
DIR = os.path.dirname(os.path.abspath(__file__)) DIR = os.path.dirname(os.path.abspath(__file__))
@ -255,3 +244,30 @@ class TestSerde(unittest.TestCase):
) )
ex.assert_matches(self, doc) ex.assert_matches(self, doc)
def test_mimic_write_file_04(self):
with open(os.path.join(DIR, "04-code.org")) as f:
orig = f.read()
doc = loads(orig)
self.assertEqual(dumps(doc), orig)
def test_code_file_04(self):
with open(os.path.join(DIR, "04-code.org")) as f:
doc = load(f)
snippets = list(doc.get_code_snippets())
self.assertEqual(len(snippets), 2)
self.assertEqual(
snippets[0].content,
'echo "This is a test"\n' + "exit 0 # Exit successfully",
)
self.assertEqual(
snippets[0].result,
"This is a test",
)
self.assertEqual(
snippets[1].content, 'echo "This is another test"\n' + "exit 0 # Comment"
)
self.assertEqual(snippets[1].result, "This is another test")