#!/usr/bin/env python3 import json import html import logging import os import sys import uuid import xapian import shutil from datetime import datetime import org_rw from org_rw import OrgTime, dom, Link from org_rw import dump as dump_org from org_rw import load as load_org from org_rw import token_list_to_raw EXTENSIONS = [ ".org", ".org.txt", ] MIN_HIDDEN_HEADLINE_LEVEL = 2 def load_all(top_dir_relative): top = os.path.abspath(top_dir_relative) docs = [] for root, dirs, files in os.walk(top): for name in files: if ".org" not in name: continue path = os.path.join(root, name) try: doc = load_org(open(path), extra_cautious=True) docs.append(doc) except Exception as err: import traceback traceback.print_exc() print(f"== On {path}") sys.exit(1) logging.info("Collected {} files".format(len(docs))) return docs def main(src_top, dest_top): docs = load_all(src_top) files_generated = 0 doc_to_headline_remapping = {} os.makedirs(dest_top, exist_ok=True) graph = {} for doc in docs: relpath = os.path.relpath(doc.path, src_top) changed = False headlines = list(doc.getAllHeadlines()) related = None i = len(headlines) while i > 0: i -= 1 headline = headlines[i] if headline.title.strip().lower() == "related" and headline.depth == 1: if related is not None: print( "Found duplicated related: {} vs {}".format( related.id, headline.id ) ) assert related is None related = headline headlines.pop(i) for headline in headlines: if headline.id is None: headline.id = str(uuid.uuid4()) changed = True if changed: print("Updated", relpath) save_changes(doc) if not relpath.startswith("public/"): # print("Skip:", relpath) continue main_headline = None topHeadlines = doc.getTopHeadlines() if ((len(topHeadlines) == 1 and related is None) or (len(topHeadlines) == 2 and related is not None)): main_headline = [h for h in topHeadlines if h != related][0] if doc.id is not None: endpath = os.path.join(dest_top, doc.id + ".node.html") with open(endpath, "wt") as f: doc_to_headline_remapping['id:' + doc.id] = 'id:' + main_headline.id f.write(as_document(render(main_headline, doc, headlineLevel=0))) files_generated += 1 elif doc.id is not None: logging.error("Cannot render document from id: {}. {} headlines {} related".format( relpath, len(topHeadlines), 'with' if related is not None else 'without' )) for headline in headlines: endpath = os.path.join(dest_top, headline.id + ".node.html") links = [] headline_links = list(headline.get_links()) if headline == main_headline and related is not None: headline_links.extend(list(related.get_links())) for l in headline_links: if l.value.startswith('http://') or l.value.startswith('https://'): pass # Ignore for now, external URL elif l.value.startswith('id:'): links.append({'target': l.value}) elif l.value.startswith('attachment:'): pass # Ignore, attachment elif l.value.startswith('file:'): pass # Ignore, attachment elif l.value.startswith('notmuch:'): pass # Ignore, mail elif l.value.startswith('orgit-rev:'): pass # Ignore, mail elif l.value.startswith('*'): pass # Ignore, internal elif not ':' in l.value.split()[0]: pass # Ignore, internal elif l.value.startswith('./'): pass # TODO: Properly handle else: raise NotImplementedError('On document {}, link to {}'.format(doc.path, l.value)) if headline.parent: if isinstance(headline.parent, org_rw.Headline): links.append({ "target": headline.parent.id, "relation": "in" }) graph[headline.id] = { "title": headline.title.strip(), "links": links, "depth": headline.depth, } with open(endpath, "wt") as f: f.write(as_document(render(headline, doc, headlineLevel=0))) files_generated += 1 # Update graph, replace document ids with headline ids for headline_data in graph.values(): for link in headline_data['links']: if link['target'] in doc_to_headline_remapping: link['target'] = doc_to_headline_remapping[link['target']] # Output graph files graphpath = os.path.join(dest_top, "graph.json") graph_explorer_path = os.path.join(dest_top, "graph.html") with open(graphpath, "wt") as f: json.dump(obj=graph, fp=f, indent=2) graph_explorer_path = os.path.join(dest_top, "graph.html") with open(graph_explorer_path, 'wt') as f: with open(os.path.join(os.path.dirname(os.path.abspath(dest_top)), 'static', 'graph_explorer.html'), 'rt') as template: source = template.read() f.write(source.replace('', json.dumps(graph))) logging.info("Generated {} files".format(files_generated)) # Generate index files t0 = datetime.utcnow() logging.info("Generating text index...") xapian_db = os.path.join(dest_top, "xapian") if os.path.exists(xapian_db): shutil.rmtree(xapian_db) db = xapian.WritableDatabase(xapian_db, xapian.DB_CREATE) indexer = xapian.TermGenerator() stemmer = xapian.Stem("english") indexer.set_stemmer(stemmer) docid_to_node = {} for doc in docs: relpath = os.path.relpath(doc.path, src_top) if not relpath.startswith("public/"): # print("Skip:", relpath) continue changed = False for hl in doc.getAllHeadlines(): xapian_doc = xapian.Document() content = "\n".join(doc.dump_headline(hl)) xapian_doc.set_data(content) indexer.set_document(xapian_doc) indexer.index_text(content) doc_id = db.add_document(xapian_doc) docid_to_node[doc_id] = { 'hl': hl.id, 'doc': doc.path } docid_map_path = os.path.join(xapian_db, "docid_map.json") with open(docid_map_path, 'wt') as f: json.dump(docid_to_node, f) logging.info("Text index generated in {}".format(datetime.utcnow() - t0)) def print_tree(tree, indentation=0): return for element in tree: print(" " * indentation + "- " + str(type(element))) if "children" in dir(element): if len(element.children) > 0: print_tree(element.children, indentation + 1) print() def render_property_drawer(element, acc): pass def render_logbook_drawer(element, acc): pass def render_property_node(element, acc): pass def render_list_group(element, acc): acc.append("") def render_list_item(element, acc): acc.append("
  • ") if element.tag is not None: acc.append("") acc.append(html.escape(element.tag)) acc.append("") acc.append("") render_text_tokens(element.content, acc) acc.append("
  • ") def render_code_block(element, acc): acc.append('
    ')
        acc.append(html.escape(element.lines))
        acc.append('
    ') def render_results_block(element, acc): # TODO: # acc.append('
    ')
        # render_tree(element.children, acc)
        # acc.append('
    ') pass def render_text(element, acc): acc.append('') render_text_tokens(element.content, acc) acc.append('') def render_text_tokens(tokens, acc): for chunk in tokens: if isinstance(chunk, str): acc.append('{} '.format(chunk)) elif isinstance(chunk, Link): link_target = chunk.value if link_target.startswith('id:'): link_target = './' + link_target[3:] + '.node.html' description = chunk.description if description is None: description = chunk.value acc.append('{}'.format( html.escape(link_target), html.escape(description), )) # else: # raise NotImplementedError('TextToken: {}'.format(chunk)) def render_tag(element, acc): return { dom.PropertyDrawerNode: render_property_drawer, dom.LogbookDrawerNode: render_logbook_drawer, dom.PropertyNode: render_property_node, dom.ListGroupNode: render_list_group, dom.ListItem: render_list_item, dom.CodeBlock: render_code_block, dom.Text: render_text, dom.ResultsDrawerNode: render_results_block, }[type(element)](element, acc) def render_tree(tree, acc): for element in tree: render_tag(element, acc) def render(headline, doc, headlineLevel): try: dom = headline.as_dom() except: logging.error("Error generating DOM for {}".format(doc.path)) raise print_tree(dom) content = [] render_tree(dom, content) for child in headline.children: content.append(render(child, doc, headlineLevel=headlineLevel+1)) if headline.state is None: state = "" else: state = f'{headline.state}' if headline.is_todo: todo_state = "todo" else: todo_state = "done" display_state = 'collapsed' if headlineLevel < MIN_HIDDEN_HEADLINE_LEVEL: display_state = 'expanded' return f"""

    {state} {html.escape(headline.title)}

    {''.join(content)}
    """ def as_document(html): return f""" {html} """ def save_changes(doc): assert doc.path is not None with open(doc.path, "wt") as f: dump_org(doc, f) if __name__ == "__main__": if len(sys.argv) != 3: print("Usage: {} SOURCE_TOP DEST_TOP".format(sys.argv[0])) exit(0) logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s") main(sys.argv[1], sys.argv[2])