399 lines
12 KiB
Python
399 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import json
|
|
import html
|
|
import logging
|
|
import os
|
|
import sys
|
|
import uuid
|
|
import xapian
|
|
import shutil
|
|
from datetime import datetime
|
|
|
|
import org_rw
|
|
from org_rw import OrgTime, dom, Link
|
|
from org_rw import dump as dump_org
|
|
from org_rw import load as load_org
|
|
from org_rw import token_list_to_raw
|
|
|
|
EXTENSIONS = [
|
|
".org",
|
|
".org.txt",
|
|
]
|
|
|
|
MIN_HIDDEN_HEADLINE_LEVEL = 2
|
|
|
|
def load_all(top_dir_relative):
|
|
top = os.path.abspath(top_dir_relative)
|
|
|
|
docs = []
|
|
|
|
for root, dirs, files in os.walk(top):
|
|
for name in files:
|
|
if ".org" not in name:
|
|
continue
|
|
|
|
path = os.path.join(root, name)
|
|
|
|
try:
|
|
doc = load_org(open(path), extra_cautious=True)
|
|
docs.append(doc)
|
|
except Exception as err:
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
print(f"== On {path}")
|
|
sys.exit(1)
|
|
|
|
logging.info("Collected {} files".format(len(docs)))
|
|
return docs
|
|
|
|
|
|
def main(src_top, dest_top):
|
|
docs = load_all(src_top)
|
|
files_generated = 0
|
|
|
|
doc_to_headline_remapping = {}
|
|
|
|
os.makedirs(dest_top, exist_ok=True)
|
|
graph = {}
|
|
for doc in docs:
|
|
relpath = os.path.relpath(doc.path, src_top)
|
|
changed = False
|
|
headlines = list(doc.getAllHeadlines())
|
|
related = None
|
|
|
|
i = len(headlines)
|
|
while i > 0:
|
|
i -= 1
|
|
headline = headlines[i]
|
|
if headline.title.strip().lower() == "related" and headline.depth == 1:
|
|
if related is not None:
|
|
print(
|
|
"Found duplicated related: {} vs {}".format(
|
|
related.id, headline.id
|
|
)
|
|
)
|
|
assert related is None
|
|
related = headline
|
|
headlines.pop(i)
|
|
|
|
for headline in headlines:
|
|
if headline.id is None:
|
|
headline.id = str(uuid.uuid4())
|
|
changed = True
|
|
|
|
if changed:
|
|
print("Updated", relpath)
|
|
save_changes(doc)
|
|
|
|
if not relpath.startswith("public/"):
|
|
# print("Skip:", relpath)
|
|
continue
|
|
|
|
main_headline = None
|
|
topHeadlines = doc.getTopHeadlines()
|
|
|
|
if ((len(topHeadlines) == 1 and related is None)
|
|
or (len(topHeadlines) == 2 and related is not None)):
|
|
|
|
main_headline = [h for h in topHeadlines if h != related][0]
|
|
if doc.id is not None:
|
|
endpath = os.path.join(dest_top, doc.id + ".node.html")
|
|
with open(endpath, "wt") as f:
|
|
doc_to_headline_remapping['id:' + doc.id] = 'id:' + main_headline.id
|
|
|
|
f.write(as_document(render(main_headline, doc, headlineLevel=0)))
|
|
files_generated += 1
|
|
elif doc.id is not None:
|
|
logging.error("Cannot render document from id: {}. {} headlines {} related".format(
|
|
relpath,
|
|
len(topHeadlines),
|
|
'with' if related is not None else 'without'
|
|
))
|
|
|
|
for headline in headlines:
|
|
endpath = os.path.join(dest_top, headline.id + ".node.html")
|
|
|
|
links = []
|
|
headline_links = list(headline.get_links())
|
|
if headline == main_headline and related is not None:
|
|
headline_links.extend(list(related.get_links()))
|
|
|
|
for l in headline_links:
|
|
if l.value.startswith('http://') or l.value.startswith('https://'):
|
|
pass # Ignore for now, external URL
|
|
elif l.value.startswith('id:'):
|
|
links.append({'target': l.value})
|
|
elif l.value.startswith('attachment:'):
|
|
pass # Ignore, attachment
|
|
elif l.value.startswith('file:'):
|
|
pass # Ignore, attachment
|
|
elif l.value.startswith('notmuch:'):
|
|
pass # Ignore, mail
|
|
elif l.value.startswith('orgit-rev:'):
|
|
pass # Ignore, mail
|
|
elif l.value.startswith('*'):
|
|
pass # Ignore, internal
|
|
elif not ':' in l.value.split()[0]:
|
|
pass # Ignore, internal
|
|
elif l.value.startswith('./'):
|
|
pass # TODO: Properly handle
|
|
else:
|
|
raise NotImplementedError('On document {}, link to {}'.format(doc.path, l.value))
|
|
|
|
if headline.parent:
|
|
if isinstance(headline.parent, org_rw.Headline):
|
|
links.append({
|
|
"target": headline.parent.id,
|
|
"relation": "in"
|
|
})
|
|
graph[headline.id] = {
|
|
"title": headline.title.strip(),
|
|
"links": links,
|
|
"depth": headline.depth,
|
|
}
|
|
|
|
with open(endpath, "wt") as f:
|
|
f.write(as_document(render(headline, doc, headlineLevel=0)))
|
|
files_generated += 1
|
|
|
|
# Update graph, replace document ids with headline ids
|
|
for headline_data in graph.values():
|
|
for link in headline_data['links']:
|
|
if link['target'] in doc_to_headline_remapping:
|
|
link['target'] = doc_to_headline_remapping[link['target']]
|
|
|
|
# Output graph files
|
|
graphpath = os.path.join(dest_top, "graph.json")
|
|
graph_explorer_path = os.path.join(dest_top, "graph.html")
|
|
with open(graphpath, "wt") as f:
|
|
json.dump(obj=graph, fp=f, indent=2)
|
|
graph_explorer_path = os.path.join(dest_top, "graph.html")
|
|
with open(graph_explorer_path, 'wt') as f:
|
|
with open(os.path.join(os.path.dirname(os.path.abspath(dest_top)), 'static', 'graph_explorer.html'), 'rt') as template:
|
|
source = template.read()
|
|
f.write(source.replace('<!-- REPLACE_THIS_WITH_GRAPH -->',
|
|
json.dumps(graph)))
|
|
|
|
logging.info("Generated {} files".format(files_generated))
|
|
|
|
# Generate index files
|
|
t0 = datetime.utcnow()
|
|
logging.info("Generating text index...")
|
|
|
|
xapian_db = os.path.join(dest_top, "xapian")
|
|
if os.path.exists(xapian_db):
|
|
shutil.rmtree(xapian_db)
|
|
db = xapian.WritableDatabase(xapian_db, xapian.DB_CREATE)
|
|
|
|
indexer = xapian.TermGenerator()
|
|
stemmer = xapian.Stem("english")
|
|
indexer.set_stemmer(stemmer)
|
|
|
|
docid_to_node = {}
|
|
|
|
for doc in docs:
|
|
relpath = os.path.relpath(doc.path, src_top)
|
|
|
|
if not relpath.startswith("public/"):
|
|
# print("Skip:", relpath)
|
|
continue
|
|
|
|
changed = False
|
|
for hl in doc.getAllHeadlines():
|
|
xapian_doc = xapian.Document()
|
|
content = "\n".join(doc.dump_headline(hl))
|
|
|
|
xapian_doc.set_data(content)
|
|
indexer.set_document(xapian_doc)
|
|
indexer.index_text(content)
|
|
|
|
doc_id = db.add_document(xapian_doc)
|
|
docid_to_node[doc_id] = { 'hl': hl.id, 'doc': doc.path }
|
|
|
|
docid_map_path = os.path.join(xapian_db, "docid_map.json")
|
|
with open(docid_map_path, 'wt') as f:
|
|
json.dump(docid_to_node, f)
|
|
|
|
logging.info("Text index generated in {}".format(datetime.utcnow() - t0))
|
|
|
|
|
|
def print_tree(tree, indentation=0):
|
|
return
|
|
for element in tree:
|
|
print(" " * indentation + "- " + str(type(element)))
|
|
if "children" in dir(element):
|
|
if len(element.children) > 0:
|
|
print_tree(element.children, indentation + 1)
|
|
print()
|
|
|
|
|
|
def render_property_drawer(element, acc):
|
|
pass
|
|
|
|
|
|
def render_logbook_drawer(element, acc):
|
|
pass
|
|
|
|
|
|
def render_property_node(element, acc):
|
|
pass
|
|
|
|
|
|
def render_list_group(element, acc):
|
|
acc.append("<ul>")
|
|
render_tree(element.children, acc)
|
|
acc.append("</ul>")
|
|
|
|
|
|
def render_list_item(element, acc):
|
|
acc.append("<li>")
|
|
if element.tag is not None:
|
|
acc.append("<span class='tag'>")
|
|
acc.append(html.escape(element.tag))
|
|
acc.append("</span>")
|
|
|
|
acc.append("<span class='item'>")
|
|
render_text_tokens(element.content, acc)
|
|
acc.append("</span></li>")
|
|
|
|
|
|
def render_code_block(element, acc):
|
|
acc.append('<pre><code>')
|
|
acc.append(html.escape(element.lines))
|
|
acc.append('</code></pre>')
|
|
|
|
def render_results_block(element, acc):
|
|
# TODO:
|
|
# acc.append('<pre class="results"><code>')
|
|
# render_tree(element.children, acc)
|
|
# acc.append('</code></pre>')
|
|
pass
|
|
|
|
|
|
def render_text(element, acc):
|
|
acc.append('<span class="text">')
|
|
render_text_tokens(element.content, acc)
|
|
acc.append('</span>')
|
|
|
|
def render_text_tokens(tokens, acc):
|
|
for chunk in tokens:
|
|
if isinstance(chunk, str):
|
|
acc.append('{}</span> '.format(chunk))
|
|
elif isinstance(chunk, Link):
|
|
link_target = chunk.value
|
|
if link_target.startswith('id:'):
|
|
link_target = './' + link_target[3:] + '.node.html'
|
|
description = chunk.description
|
|
if description is None:
|
|
description = chunk.value
|
|
|
|
acc.append('<a href="{}">{}</a>'.format(
|
|
html.escape(link_target),
|
|
html.escape(description),
|
|
))
|
|
# else:
|
|
# raise NotImplementedError('TextToken: {}'.format(chunk))
|
|
|
|
|
|
def render_tag(element, acc):
|
|
return {
|
|
dom.PropertyDrawerNode: render_property_drawer,
|
|
dom.LogbookDrawerNode: render_logbook_drawer,
|
|
dom.PropertyNode: render_property_node,
|
|
dom.ListGroupNode: render_list_group,
|
|
dom.ListItem: render_list_item,
|
|
dom.CodeBlock: render_code_block,
|
|
dom.Text: render_text,
|
|
dom.ResultsDrawerNode: render_results_block,
|
|
}[type(element)](element, acc)
|
|
|
|
|
|
def render_tree(tree, acc):
|
|
for element in tree:
|
|
render_tag(element, acc)
|
|
|
|
|
|
def render(headline, doc, headlineLevel):
|
|
try:
|
|
dom = headline.as_dom()
|
|
except:
|
|
logging.error("Error generating DOM for {}".format(doc.path))
|
|
raise
|
|
print_tree(dom)
|
|
|
|
content = []
|
|
render_tree(dom, content)
|
|
for child in headline.children:
|
|
content.append(render(child, doc, headlineLevel=headlineLevel+1))
|
|
|
|
if headline.state is None:
|
|
state = ""
|
|
else:
|
|
state = f'<span class="state todo-{headline.is_todo} state-{headline.state}">{headline.state}</span>'
|
|
|
|
if headline.is_todo:
|
|
todo_state = "todo"
|
|
else:
|
|
todo_state = "done"
|
|
|
|
display_state = 'collapsed'
|
|
if headlineLevel < MIN_HIDDEN_HEADLINE_LEVEL:
|
|
display_state = 'expanded'
|
|
|
|
return f"""
|
|
<div id="{html.escape(headline.id)}" class="node {todo_state} {display_state}">
|
|
<h1 class="title">
|
|
{state}
|
|
<a href=\"javascript:toggle_expand('{html.escape(headline.id)}')\">
|
|
{html.escape(headline.title)}
|
|
</a>
|
|
</h1>
|
|
<div class='contents'>
|
|
{''.join(content)}
|
|
</div>
|
|
</div>
|
|
"""
|
|
|
|
|
|
def as_document(html):
|
|
return f"""<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<link href="../static/style.css" rel="stylesheet"/>
|
|
<script type="text/javascript">
|
|
function toggle_expand(header_id) {{
|
|
var e = document.getElementById(header_id);
|
|
if (e.classList.contains('expanded')) {{
|
|
e.classList.add('collapsed');
|
|
e.classList.remove('expanded');
|
|
}}
|
|
else {{
|
|
e.classList.add('expanded');
|
|
e.classList.remove('collapsed');
|
|
}}
|
|
}}
|
|
</script>
|
|
</head>
|
|
<body>
|
|
{html}
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
|
|
def save_changes(doc):
|
|
assert doc.path is not None
|
|
with open(doc.path, "wt") as f:
|
|
dump_org(doc, f)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 3:
|
|
print("Usage: {} SOURCE_TOP DEST_TOP".format(sys.argv[0]))
|
|
exit(0)
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s")
|
|
main(sys.argv[1], sys.argv[2])
|