Initial commit, simplistic parsing.
This commit is contained in:
commit
d29058cb5e
138
.gitignore
vendored
Normal file
138
.gitignore
vendored
Normal file
@ -0,0 +1,138 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
1
org_dom/__init__.py
Normal file
1
org_dom/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
from .org_dom import OrgDom, load, loads
|
211
org_dom/org_dom.py
Normal file
211
org_dom/org_dom.py
Normal file
@ -0,0 +1,211 @@
|
||||
import re
|
||||
import collections
|
||||
from typing import List
|
||||
|
||||
BASE_ENVIRONMENT = {
|
||||
'org-footnote-section': 'Footnotes',
|
||||
'org-options-keywords': (
|
||||
"ARCHIVE:",
|
||||
"AUTHOR:",
|
||||
"BIND:",
|
||||
"CATEGORY:",
|
||||
"COLUMNS:",
|
||||
"CREATOR:",
|
||||
"DATE:",
|
||||
"DESCRIPTION:",
|
||||
"DRAWERS:",
|
||||
"EMAIL:",
|
||||
"EXCLUDE_TAGS:",
|
||||
"FILETAGS:",
|
||||
"INCLUDE:",
|
||||
"INDEX:",
|
||||
"KEYWORDS:",
|
||||
"LANGUAGE:",
|
||||
"MACRO:",
|
||||
"OPTIONS:",
|
||||
"PROPERTY:",
|
||||
"PRIORITIES:",
|
||||
"SELECT_TAGS:",
|
||||
"SEQ_TODO:",
|
||||
"SETUPFILE:",
|
||||
"STARTUP:",
|
||||
"TAGS:"
|
||||
"TITLE:",
|
||||
"TODO:",
|
||||
"TYP_TODO:",
|
||||
"SELECT_TAGS:",
|
||||
"EXCLUDE_TAGS:"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
HEADLINE_RE = re.compile(r'^(?P<stars>\*+) (?P<spacing>\s*)(?P<line>.*)$')
|
||||
KEYWORDS_RE = re.compile(r'^(?P<indentation>\s*)#\+(?P<key>[^:\[]+)(\[(?P<options>[^\]]*)\])?:(?P<spacing>\s*)(?P<value>.*)$')
|
||||
PROPERTY_DRAWER_RE = re.compile(r'^(?P<indentation>\s*):PROPERTIES:(?P<end_indentation>\s*)$')
|
||||
DRAWER_END_RE = re.compile(r'^(?P<indentation>\s*):END:(?P<end_indentation>\s*)$')
|
||||
NODE_PROPERTIES_RE = re.compile(r'^(?P<indentation>\s*):(?P<key>[^+:]+)(?P<plus>\+)?:(?P<spacing>\s*)(?P<value>.*)$')
|
||||
RAW_LINE_RE = re.compile(r'^\s*([^\s#:*]|$)')
|
||||
BASE_TIME_STAMP_RE = r'(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}) (?P<dow>[^ ]+)( (?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2})(--(?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?)?'
|
||||
|
||||
ACTIVE_TIME_STAMP_RE = re.compile(r'<{}>'.format(BASE_TIME_STAMP_RE))
|
||||
INACTIVE_TIME_STAMP_RE = re.compile(r'\[{}\]'.format(BASE_TIME_STAMP_RE))
|
||||
|
||||
# BASE_TIME_RANGE_RE = (r'(?P<start_year>\d{4})-(?P<start_month>\d{2})-(?P<start_day>\d{2}) (?P<start_dow>[^ ]+)((?P<start_hour>\d{1,2}):(?P<start_minute>\d{1,2}))?',
|
||||
# r'(?P<end_year>\d{4})-(?P<end_month>\d{2})-(?P<end_day>\d{2}) (?P<end_dow>[^ ]+)((?P<end_hour>\d{1,2}):(?P<end_minute>\d{1,2}))?')
|
||||
|
||||
Headline = collections.namedtuple('Headline', ('start_line', 'depth',
|
||||
'keyword_start', 'keyword',
|
||||
'priority_start', 'priority',
|
||||
'title_start', 'title',
|
||||
'tags_start', 'tags',
|
||||
'content',
|
||||
'children',
|
||||
))
|
||||
|
||||
Property = collections.namedtuple('Property', ('name', 'value', 'options'))
|
||||
TimeRange = collections.namedtuple('TimeRange', ('start_time', 'end_time'))
|
||||
Timestamp = collections.namedtuple('Timestamp', ('year', 'month', 'day', 'dow', 'hour', 'minute'))
|
||||
|
||||
|
||||
def parse_org_time(value):
|
||||
if m := ACTIVE_TIME_STAMP_RE.match(value):
|
||||
active = True
|
||||
elif m := INACTIVE_TIME_STAMP_RE.match(value):
|
||||
active = False
|
||||
else:
|
||||
return None
|
||||
|
||||
if m.group('end_hour'):
|
||||
return TimeRange(Timestamp(int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('start_hour')), int(m.group('start_minute'))),
|
||||
Timestamp(int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('end_hour')), int(m.group('end_minute'))))
|
||||
return Timestamp(int(m.group('year')), int(m.group('month')), int(m.group('day')), m.group('dow'), int(m.group('start_hour')), int(m.group('start_minute')))
|
||||
|
||||
|
||||
class OrgDom:
|
||||
def __init__(self, headlines, keywords):
|
||||
self.headlines: List[Headline] = headlines
|
||||
self.keywords: List[Property] = keywords
|
||||
|
||||
def serialize(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
## Querying
|
||||
def getProperties(self):
|
||||
return [
|
||||
Property(name=kw.group('key'),
|
||||
value=kw.group('value'),
|
||||
options=kw.group('options'),
|
||||
)
|
||||
for kw in self.keywords
|
||||
]
|
||||
|
||||
def getTopHeadlines(self):
|
||||
return self.headlines
|
||||
|
||||
class OrgDomReader:
|
||||
|
||||
def __init__(self):
|
||||
self.headlines: List[Headline] = []
|
||||
self.keywords: List[Property] = []
|
||||
self.headline_hierarchy: List[OrgDom] = []
|
||||
|
||||
def finalize(self):
|
||||
return OrgDom(self.headlines, self.keywords)
|
||||
|
||||
## Construction
|
||||
def add_headline(self, linenum: int, match: re.Match) -> int:
|
||||
# Position reader on the proper headline
|
||||
stars = match.group('stars')
|
||||
depth = len(stars) - 1
|
||||
|
||||
headline = {
|
||||
'linenum': linenum,
|
||||
'orig': match,
|
||||
'title': match.group('line'),
|
||||
'contents': [],
|
||||
'children': [],
|
||||
'keywords': [],
|
||||
'properties': [],
|
||||
}
|
||||
|
||||
while (depth - 1) > len(self.headline_hierarchy):
|
||||
# Introduce structural headlines
|
||||
self.headline_hierarchy.append(None)
|
||||
while depth < len(self.headline_hierarchy):
|
||||
self.headline_hierarchy.pop()
|
||||
|
||||
if depth == 0:
|
||||
self.headlines.append(headline)
|
||||
else:
|
||||
self.headline_hierarchy[-1]['children'].append(headline)
|
||||
self.headline_hierarchy.append(headline)
|
||||
|
||||
|
||||
def add_keyword_line(self, linenum: int, match: re.Match) -> int:
|
||||
if len(self.headline_hierarchy) == 0:
|
||||
self.keywords.append(match)
|
||||
else:
|
||||
self.headline_hierarchy[-1]['keywords'].append('match')
|
||||
|
||||
def add_raw_line(self, linenum: int, line: str) -> int:
|
||||
print('>>', line)
|
||||
pass
|
||||
|
||||
def add_property_drawer_line(self, linenum: int, match: re.Match) -> int:
|
||||
self.current_drawer = self.headline_hierarchy[-1]['properties']
|
||||
|
||||
def add_drawer_end_line(self, linenum: int, match: re.Match) -> int:
|
||||
self.current_drawer = None
|
||||
|
||||
def add_node_properties_line(self, linenum: int, match: re.Match) -> int:
|
||||
key = match.group('key')
|
||||
value = match.group('value').strip()
|
||||
|
||||
if (value.count('>--<') == 1) or (value.count(']--[') == 1):
|
||||
# Time ranges with two different dates
|
||||
# @TODO properly consider "=> DURATION" section
|
||||
chunks = value.split('=').split('--')
|
||||
as_time_range = parse_org_time(chunks[0], chunks[1])
|
||||
if (as_time_range[0] is not None) and (as_time_range[1] is not None):
|
||||
value = TimeRange(as_time_range[0], as_time_range[1])
|
||||
elif as_time := parse_org_time(value):
|
||||
value = as_time
|
||||
|
||||
self.current_drawer.append(Property(key, value, None))
|
||||
|
||||
def read(self, s, environment):
|
||||
lines = s.split('\n')
|
||||
reader = enumerate(lines)
|
||||
|
||||
for linenum, line in reader:
|
||||
if m := RAW_LINE_RE.match(line):
|
||||
# TODO: Parse line
|
||||
self.add_raw_line(linenum, line)
|
||||
elif m := HEADLINE_RE.match(line):
|
||||
# TODO: Parse headline
|
||||
self.add_headline(linenum, m)
|
||||
elif m := KEYWORDS_RE.match(line):
|
||||
# TODO: Parse line
|
||||
self.add_keyword_line(linenum, m)
|
||||
elif m := PROPERTY_DRAWER_RE.match(line):
|
||||
# TODO: Parse line
|
||||
self.add_property_drawer_line(linenum, m)
|
||||
elif m := DRAWER_END_RE.match(line):
|
||||
# TODO: Parse line
|
||||
self.add_drawer_end_line(linenum, m)
|
||||
elif m := NODE_PROPERTIES_RE.match(line):
|
||||
# TODO: Parse line
|
||||
self.add_node_properties_line(linenum, m)
|
||||
else:
|
||||
raise NotImplementedError('{}: ‘{}’'.format(linenum, line))
|
||||
|
||||
|
||||
def loads(s, environment=BASE_ENVIRONMENT):
|
||||
doc = OrgDomReader()
|
||||
doc.read(s, environment)
|
||||
return doc.finalize()
|
||||
|
||||
|
||||
def load(f, environment=BASE_ENVIRONMENT):
|
||||
return loads(f.read(), environment)
|
1
requirements.txt
Normal file
1
requirements.txt
Normal file
@ -0,0 +1 @@
|
||||
# No external requirements at this point
|
15
setup.py
Normal file
15
setup.py
Normal file
@ -0,0 +1,15 @@
|
||||
from setuptools import setup
|
||||
|
||||
setup(
|
||||
name='org-dom',
|
||||
version='0.0.1',
|
||||
description=
|
||||
'Library to de/serialize org-files and manipulate them in a DOM-like manner.',
|
||||
author='kenkeiras',
|
||||
author_email='kenkeiras@codigoparallevar.com',
|
||||
license='Apache License 2.0',
|
||||
packages=['org_dom'],
|
||||
scripts=[],
|
||||
include_package_data=False,
|
||||
install_requires=[],
|
||||
zip_safe=True)
|
25
tests/01-simple.org
Normal file
25
tests/01-simple.org
Normal file
@ -0,0 +1,25 @@
|
||||
#+TITLE: 01-Simple
|
||||
#+DESCRIPTION: Simple org file
|
||||
#+TODO: TODO(t) PAUSED(p) | DONE(d)
|
||||
|
||||
|
||||
* First level
|
||||
:PROPERTIES:
|
||||
:ID: 01-simple-first-level-id
|
||||
:CREATED: [2020-01-01 Wed 01:01]
|
||||
:END:
|
||||
First level content
|
||||
|
||||
** Second level
|
||||
:PROPERTIES:
|
||||
:ID: 01-simple-second-level-id
|
||||
:END:
|
||||
|
||||
Second level content
|
||||
|
||||
*** Third level
|
||||
:PROPERTIES:
|
||||
:ID: 01-simple-third-level-id
|
||||
:END:
|
||||
|
||||
Third level content
|
39
tests/test_dom.py
Normal file
39
tests/test_dom.py
Normal file
@ -0,0 +1,39 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
from datetime import datetime as DT
|
||||
|
||||
from org_dom import load, loads
|
||||
from utils.dom_assertions import HL, Dom
|
||||
|
||||
DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
class TestSerde(unittest.TestCase):
|
||||
def test_simple_file_01(self):
|
||||
with open(os.path.join(DIR, '01-simple.org')) as f:
|
||||
doc = load(f)
|
||||
|
||||
ex = Dom(props=[('TITLE', '01-Simple'),
|
||||
('DESCRIPTION', 'Simple org file'),
|
||||
('TODO', 'TODO(t) PAUSED(p) | DONE(d)')],
|
||||
children=(HL(
|
||||
'First level',
|
||||
props=[
|
||||
('ID', '01-simple-first-level-id'),
|
||||
('CREATED', DT(2020, 1, 1, 1, 1)),
|
||||
],
|
||||
content='First level content',
|
||||
children=[
|
||||
HL('Second level',
|
||||
props=[('ID', '01-simple-second-level-id')],
|
||||
content='Second level content',
|
||||
children=[
|
||||
HL('Third level',
|
||||
props=[('ID', '01-simple-third-level-id')],
|
||||
content='Third level content')
|
||||
])
|
||||
])))
|
||||
|
||||
ex.assert_matches(self, doc)
|
77
tests/utils/dom_assertions.py
Normal file
77
tests/utils/dom_assertions.py
Normal file
@ -0,0 +1,77 @@
|
||||
import collections
|
||||
import unittest
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def timestamp_to_datetime(ts):
|
||||
return datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute)
|
||||
|
||||
|
||||
class Dom:
|
||||
def __init__(self, *, props=None, children=None):
|
||||
self.props = props
|
||||
self.children = children
|
||||
if isinstance(self.children, HL):
|
||||
self.children = [self.children]
|
||||
|
||||
def assert_matches(self, test_case: unittest.TestCase, doc):
|
||||
# Check properties
|
||||
if self.props is None:
|
||||
test_case.assertEqual(len(doc.getProperties()), 0)
|
||||
else:
|
||||
doc_props = doc.getProperties()
|
||||
test_case.assertEqual(len(doc_props), len(self.props))
|
||||
|
||||
for i, prop in enumerate(self.props):
|
||||
test_case.assertEqual(doc_props[i].name, prop[0])
|
||||
test_case.assertEqual(doc_props[i].value, prop[1])
|
||||
|
||||
# @TODO: Check properties
|
||||
|
||||
# Check children
|
||||
if self.children is None:
|
||||
test_case.assertEqual(len(doc.getTopHeadlines()), 0, "Top")
|
||||
else:
|
||||
doc_headlines = doc.getTopHeadlines()
|
||||
test_case.assertEqual(len(doc_headlines), len(self.children),
|
||||
"Top")
|
||||
|
||||
for i, children in enumerate(self.children):
|
||||
children.assert_matches(test_case, doc_headlines[i])
|
||||
|
||||
|
||||
class HL:
|
||||
def __init__(self, title, *, props=None, content=None, children=None):
|
||||
self.title = title
|
||||
self.props = props
|
||||
self.content = content
|
||||
self.children = children
|
||||
|
||||
def assert_matches(self, test_case: unittest.TestCase, doc):
|
||||
test_case.assertEqual(self.title, doc['title'])
|
||||
|
||||
# Check properties
|
||||
if self.props is None:
|
||||
test_case.assertEqual(len(doc['properties']), 0)
|
||||
else:
|
||||
doc_props = doc['properties']
|
||||
test_case.assertEqual(len(doc_props), len(self.props))
|
||||
|
||||
for i, prop in enumerate(self.props):
|
||||
test_case.assertEqual(doc_props[i].name, prop[0])
|
||||
if isinstance(prop[1], datetime):
|
||||
test_case.assertEqual(
|
||||
timestamp_to_datetime(doc_props[i].value), prop[1])
|
||||
|
||||
# @TODO: Check properties
|
||||
|
||||
# Check children
|
||||
if self.children is None:
|
||||
test_case.assertEqual(len(doc['children']), 0)
|
||||
else:
|
||||
doc_headlines = doc['children']
|
||||
test_case.assertEqual(len(doc_headlines), len(self.children),
|
||||
self.title)
|
||||
|
||||
for i, children in enumerate(self.children):
|
||||
children.assert_matches(test_case, doc_headlines[i])
|
18
tox.ini
Normal file
18
tox.ini
Normal file
@ -0,0 +1,18 @@
|
||||
# Tox (http://tox.testrun.org/) is a tool for running tests
|
||||
# in multiple virtualenvs. This configuration file will run the
|
||||
# test suite on all supported python versions. To use it, "pip install tox"
|
||||
# and then run "tox" from this directory.
|
||||
|
||||
[tox]
|
||||
# envlist = py27,py34,py35,py36,py37
|
||||
envlist = py38
|
||||
|
||||
[testenv]
|
||||
commands =
|
||||
python -m pytest --cov-report term-missing --cov org_dom tests
|
||||
deps =
|
||||
-r requirements.txt
|
||||
pytest
|
||||
pytest-cov
|
||||
setenv =
|
||||
PYTHONPATH = {toxinidir}
|
Loading…
Reference in New Issue
Block a user