#!/usr/bin/env python3 import json import html import logging import os import sys import uuid import xapian import shutil from datetime import datetime import org_rw from org_rw import OrgTime, dom, Link from org_rw import dump as dump_org from org_rw import load as load_org from org_rw import token_list_to_raw EXTENSIONS = [ ".org", ".org.txt", ] MIN_HIDDEN_HEADLINE_LEVEL = 2 def load_all(top_dir_relative): top = os.path.abspath(top_dir_relative) docs = [] for root, dirs, files in os.walk(top): for name in files: if ".org" not in name: continue path = os.path.join(root, name) try: doc = load_org(open(path), extra_cautious=True) docs.append(doc) except Exception as err: import traceback traceback.print_exc() print(f"== On {path}") sys.exit(1) logging.info("Collected {} files".format(len(docs))) return docs def main(src_top, dest_top): docs = load_all(src_top) files_generated = 0 doc_to_headline_remapping = {} os.makedirs(dest_top, exist_ok=True) graph = {} for doc in docs: relpath = os.path.relpath(doc.path, src_top) changed = False headlines = list(doc.getAllHeadlines()) related = None i = len(headlines) while i > 0: i -= 1 headline = headlines[i] if headline.title.strip().lower() == "related" and headline.depth == 1: if related is not None: print( "Found duplicated related: {} vs {}".format( related.id, headline.id ) ) assert related is None related = headline headlines.pop(i) for headline in headlines: if headline.id is None: headline.id = str(uuid.uuid4()) changed = True if changed: print("Updated", relpath) save_changes(doc) if not relpath.startswith("public/"): # print("Skip:", relpath) continue main_headline = None topHeadlines = doc.getTopHeadlines() if ((len(topHeadlines) == 1 and related is None) or (len(topHeadlines) == 2 and related is not None)): main_headline = [h for h in topHeadlines if h != related][0] if doc.id is not None: endpath = os.path.join(dest_top, doc.id + ".node.html") with open(endpath, "wt") as f: doc_to_headline_remapping['id:' + doc.id] = 'id:' + main_headline.id f.write(as_document(render(main_headline, doc, headlineLevel=0))) files_generated += 1 elif doc.id is not None: logging.error("Cannot render document from id: {}. {} headlines {} related".format( relpath, len(topHeadlines), 'with' if related is not None else 'without' )) for headline in headlines: endpath = os.path.join(dest_top, headline.id + ".node.html") links = [] headline_links = list(headline.get_links()) if headline == main_headline and related is not None: headline_links.extend(list(related.get_links())) for l in headline_links: if l.value.startswith('http://') or l.value.startswith('https://'): pass # Ignore for now, external URL elif l.value.startswith('id:'): links.append({'target': l.value}) elif l.value.startswith('attachment:'): pass # Ignore, attachment elif l.value.startswith('file:'): pass # Ignore, attachment elif l.value.startswith('notmuch:'): pass # Ignore, mail elif l.value.startswith('orgit-rev:'): pass # Ignore, mail elif l.value.startswith('*'): pass # Ignore, internal elif not ':' in l.value.split()[0]: pass # Ignore, internal elif l.value.startswith('./'): pass # TODO: Properly handle else: raise NotImplementedError('On document {}, link to {}'.format(doc.path, l.value)) if headline.parent: if isinstance(headline.parent, org_rw.Headline): links.append({ "target": headline.parent.id, "relation": "in" }) graph[headline.id] = { "title": headline.title.strip(), "links": links, "depth": headline.depth, } with open(endpath, "wt") as f: f.write(as_document(render(headline, doc, headlineLevel=0))) files_generated += 1 # Update graph, replace document ids with headline ids for headline_data in graph.values(): for link in headline_data['links']: if link['target'] in doc_to_headline_remapping: link['target'] = doc_to_headline_remapping[link['target']] # Output graph files graphpath = os.path.join(dest_top, "graph.json") graph_explorer_path = os.path.join(dest_top, "graph.html") with open(graphpath, "wt") as f: json.dump(obj=graph, fp=f, indent=2) graph_explorer_path = os.path.join(dest_top, "graph.html") with open(graph_explorer_path, 'wt') as f: with open(os.path.join(os.path.dirname(os.path.abspath(dest_top)), 'static', 'graph_explorer.html'), 'rt') as template: source = template.read() f.write(source.replace('', json.dumps(graph))) logging.info("Generated {} files".format(files_generated)) # Generate index files t0 = datetime.utcnow() logging.info("Generating text index...") xapian_db = os.path.join(dest_top, "xapian") if os.path.exists(xapian_db): shutil.rmtree(xapian_db) db = xapian.WritableDatabase(xapian_db, xapian.DB_CREATE) indexer = xapian.TermGenerator() stemmer = xapian.Stem("english") indexer.set_stemmer(stemmer) docid_to_node = {} for doc in docs: relpath = os.path.relpath(doc.path, src_top) if not relpath.startswith("public/"): # print("Skip:", relpath) continue changed = False for hl in doc.getAllHeadlines(): xapian_doc = xapian.Document() content = "\n".join(doc.dump_headline(hl)) xapian_doc.set_data(content) indexer.set_document(xapian_doc) indexer.index_text(content) doc_id = db.add_document(xapian_doc) docid_to_node[doc_id] = { 'hl': hl.id, 'doc': doc.path } docid_map_path = os.path.join(xapian_db, "docid_map.json") with open(docid_map_path, 'wt') as f: json.dump(docid_to_node, f) logging.info("Text index generated in {}".format(datetime.utcnow() - t0)) def print_tree(tree, indentation=0): return for element in tree: print(" " * indentation + "- " + str(type(element))) if "children" in dir(element): if len(element.children) > 0: print_tree(element.children, indentation + 1) print() def render_property_drawer(element, acc): pass def render_logbook_drawer(element, acc): pass def render_property_node(element, acc): pass def render_list_group(element, acc): acc.append("
')
acc.append(html.escape(element.lines))
acc.append('
')
def render_results_block(element, acc):
# TODO:
# acc.append('')
# render_tree(element.children, acc)
# acc.append('
')
pass
def render_text(element, acc):
acc.append('')
render_text_tokens(element.content, acc)
acc.append('')
def render_text_tokens(tokens, acc):
for chunk in tokens:
if isinstance(chunk, str):
acc.append('{} '.format(chunk))
elif isinstance(chunk, Link):
link_target = chunk.value
if link_target.startswith('id:'):
link_target = './' + link_target[3:] + '.node.html'
description = chunk.description
if description is None:
description = chunk.value
acc.append('{}'.format(
html.escape(link_target),
html.escape(description),
))
# else:
# raise NotImplementedError('TextToken: {}'.format(chunk))
def render_tag(element, acc):
return {
dom.PropertyDrawerNode: render_property_drawer,
dom.LogbookDrawerNode: render_logbook_drawer,
dom.PropertyNode: render_property_node,
dom.ListGroupNode: render_list_group,
dom.ListItem: render_list_item,
dom.CodeBlock: render_code_block,
dom.Text: render_text,
dom.ResultsDrawerNode: render_results_block,
}[type(element)](element, acc)
def render_tree(tree, acc):
for element in tree:
render_tag(element, acc)
def render(headline, doc, headlineLevel):
try:
dom = headline.as_dom()
except:
logging.error("Error generating DOM for {}".format(doc.path))
raise
print_tree(dom)
content = []
render_tree(dom, content)
for child in headline.children:
content.append(render(child, doc, headlineLevel=headlineLevel+1))
if headline.state is None:
state = ""
else:
state = f'{headline.state}'
if headline.is_todo:
todo_state = "todo"
else:
todo_state = "done"
display_state = 'collapsed'
if headlineLevel < MIN_HIDDEN_HEADLINE_LEVEL:
display_state = 'expanded'
return f"""