new-codigoparallevar/scripts/generate.py

#!/usr/bin/env python3

import sqlite3
import time
import json
import html
import logging
import os
import sys
import uuid
from datetime import datetime
import traceback

import inotify.adapters

import org_rw
from org_rw import OrgTime, dom, Link
from org_rw import dump as dump_org
from org_rw import load as load_org
from org_rw import token_list_to_raw

EXTENSIONS = [
    ".org",
    ".org.txt",
]

WATCH = True
if os.getenv('WATCH_AND_REBUILD', '1') == '0':
    WATCH = False

MIN_HIDDEN_HEADLINE_LEVEL = 2
INDEX_ID = "ea48ec1d-f9d4-4fb7-b39a-faa7b6e2ba95"
SITE_NAME = "Código para llevar"

MONITORED_EVENT_TYPES = (
    'IN_CREATE',
    # 'IN_MODIFY',
    'IN_CLOSE_WRITE',
    'IN_DELETE',
    'IN_MOVED_FROM',
    'IN_MOVED_TO',
    'IN_DELETE_SELF',
    'IN_MOVE_SELF',
)

ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

STATIC_PATH = os.path.join(ROOT_DIR, 'static')

def is_git_path(path):
    return any([chunk == ".git" for chunk in path.split(os.sep)])

def create_db(path):
    if os.path.exists(path):
        os.unlink(path)

    db = sqlite3.connect(path)
    db.execute('CREATE VIRTUAL TABLE note_search USING fts5(note_id, title, body, top_level_title, is_done, is_todo);')
    return db

def load_all(top_dir_relative):
    top = os.path.abspath(top_dir_relative)

    docs = []

    for root, dirs, files in os.walk(top):
        for name in files:
            if ".org" not in name:
                continue

            path = os.path.join(root, name)

            try:
                doc = load_org(open(path), extra_cautious=True)
                docs.append(doc)
            except Exception as err:
                import traceback

                traceback.print_exc()
                print(f"== On {path}")
                sys.exit(1)

    logging.info("Collected {} files".format(len(docs)))
    return docs

def regen_all(src_top, dest_top, *, docs=None, db=None):
    files_generated = 0
    cur = db.cursor()
    cur.execute('DELETE FROM note_search;')

    docs = load_all(src_top)
    doc_to_headline_remapping = {}

    os.makedirs(dest_top, exist_ok=True)
    graph = {}
    for doc in docs:
        relpath = os.path.relpath(doc.path, src_top)
        changed = False
        headlines = list(doc.getAllHeadlines())
        related = None

        i = len(headlines)
        while i > 0:
            i -= 1
            headline = headlines[i]
            if headline.title.get_text().strip().lower() == "related" and headline.depth == 1:
                if related is not None:
                    print(
                        "Found duplicated related: {} vs {}".format(
                            related.id, headline.id
                        )
                    )
                    assert related is None
                related = headline
                headlines.pop(i)

        for headline in headlines:
            if headline.id is None:
                headline.id = str(uuid.uuid4())
                changed = True

        if changed:
            print("Updated", relpath)
            save_changes(doc)

        if not relpath.startswith("public/"):
            # print("Skip:", relpath)
            continue

        main_headline = None
        topHeadlines = doc.getTopHeadlines()

        if ((len(topHeadlines) == 1 and related is None)
            or (len(topHeadlines) == 2 and related is not None)):

            main_headline = [h for h in topHeadlines if h != related][0]
            if doc.id is not None:
                endpath = os.path.join(dest_top, doc.id + ".node.html")
                with open(endpath, "wt") as f:
                    doc_to_headline_remapping['id:' + doc.id] = 'id:' + main_headline.id

                    f.write(render_as_document(main_headline, doc, headlineLevel=0,
                                               title=org_rw.token_list_to_plaintext(main_headline.title.contents)))
                    files_generated += 1
        elif doc.id is not None:
            logging.error("Cannot render document from id: {}. {} headlines {} related".format(
                relpath,
                len(topHeadlines),
                'with' if related is not None else 'without'
            ))

        for headline in headlines:
            endpath = os.path.join(dest_top, headline.id + ".node.html")

            links = []
            headline_links = list(headline.get_links())
            if headline == main_headline and related is not None:
                headline_links.extend(list(related.get_links()))

            for l in headline_links:
                if l.value.startswith('http://') or l.value.startswith('https://'):
                    pass # Ignore for now, external URL
                elif l.value.startswith('id:'):
                    links.append({'target': l.value})
                elif l.value.startswith('attachment:'):
                    pass # Ignore, attachment
                elif l.value.startswith('file:'):
                    pass # Ignore, attachment
                elif l.value.startswith('notmuch:'):
                    pass # Ignore, mail
                elif l.value.startswith('orgit-rev:'):
                    pass # Ignore, mail
                elif l.value.startswith('*'):
                    pass # Ignore, internal
                elif not ':' in l.value.split()[0]:
                    pass # Ignore, internal
                elif l.value.startswith('./'):
                    pass # TODO: Properly handle
                else:
                    logging.warning('On document {}, unknown link to {}'.format(doc.path, l.value))

            if headline.parent:
                if isinstance(headline.parent, org_rw.Headline):
                    links.append({
                        "target": headline.parent.id,
                        "relation": "in"
                    })
            graph[headline.id] = {
                "title": org_rw.org_rw.token_list_to_plaintext(headline.title.contents).strip(),
                "links": links,
                "depth": headline.depth,
            }

            topLevelHeadline = headline
            while isinstance(topLevelHeadline.parent, org_rw.Headline):
                topLevelHeadline = topLevelHeadline.parent

            # Save for full-text-search
            cur.execute('''INSERT INTO note_search(note_id, title, body, top_level_title, is_done, is_todo) VALUES (?, ?, ?, ?, ?, ?);''',
                        (
                            headline.id,
                            headline.title.get_text(),
                            ''.join(headline.get_contents('raw')),
                            topLevelHeadline.title.get_text(),
                            headline.is_done,
                            headline.is_todo,
                        ))

            # Render HTML
            with open(endpath, "wt") as f:
                f.write(render_as_document(headline, doc, headlineLevel=0,
                                           title=org_rw.token_list_to_plaintext(headline.title.contents)))
                files_generated += 1

            if headline.id == INDEX_ID:
                index_endpath = os.path.join(dest_top, "index.html")
                with open(index_endpath, "wt") as f:
                    f.write(render_as_document(headline, doc, headlineLevel=0,
                                               title=org_rw.token_list_to_plaintext(headline.title.contents)))
                    files_generated += 1

    # Update graph, replace document ids with headline ids
    for headline_data in graph.values():
        for link in headline_data['links']:
            if link['target'] in doc_to_headline_remapping:
                link['target'] = doc_to_headline_remapping[link['target']]

    # Output graph files
    graphpath = os.path.join(dest_top, "graph.json")
    graph_explorer_path = os.path.join(dest_top, "graph.html")
    with open(graphpath, "wt") as f:
        json.dump(obj=graph, fp=f, indent=2)
    graph_explorer_path = os.path.join(dest_top, "graph.html")
    with open(graph_explorer_path, 'wt') as f:
        with open(os.path.join(os.path.dirname(os.path.abspath(dest_top)), '..', 'static', 'graph_explorer.html'), 'rt') as template:
            source = template.read()
        f.write(source.replace('<!-- REPLACE_THIS_WITH_GRAPH -->',
                               json.dumps(graph)))
    logging.info("Generated {} files".format(files_generated))
    cur.close()
    db.commit()

def main(src_top, dest_top):
    notifier = inotify.adapters.InotifyTrees([src_top, STATIC_PATH])

    ## Initial load
    t0 = time.time()

    db = create_db(os.path.join(dest_top, 'db.sqlite3'))
    docs = regen_all(src_top, dest_top, db=db)

    if not WATCH:
        logging.info("Build completed in {:.2f}s".format(time.time() - t0))
        return 0

    logging.info("Initial load completed in {:.2f}s".format(time.time() - t0))
    ## Updating
    for event in notifier.event_gen(yield_nones=False):
        (ev, types, directory, file) = event
        if not any([type in MONITORED_EVENT_TYPES for type in types]):
            continue
        if is_git_path(directory):
            continue
        filepath = os.path.join(directory, file)
        print("CHANGED: {}".format(filepath))
        t0 = time.time()
        try:
            docs = regen_all(src_top, dest_top, docs=docs, db=db)
        except:
            logging.error(traceback.format_exc())
            logging.error("Loading new templates failed 😿")
            continue
        logging.info("Updated all in {:.2f}s".format(time.time() - t0))


def print_tree(tree, indentation=0, headline=None):
    # if headline and headline.id != INDEX_ID:
    #     return
    return
    for element in tree:
        if "children" in dir(element):
            if len(element.children) > 0:
                print_element(element.children, indentation + 1, headline)
                print()

        elif "content" in dir(element):
            for content in element.content:
                print_element(content, indentation + 1, headline)

def print_element(element, indentation, headline):
    if isinstance(element, org_rw.Link):
        print(" " * indentation, "Link:", element.get_raw())
    elif isinstance(element, str):
        print(" " * indentation, "{" + element + "}", type(element))
    else:
        print_tree(element, indentation, headline)


def render_property_drawer(element, acc):
    pass


def render_logbook_drawer(element, acc):
    pass


def render_property_node(element, acc):
    pass


def render_list_group(element, acc):
    acc.append("<ul>")
    render_tree(element.children, acc)
    acc.append("</ul>")

def render_table(element, acc):
    acc.append("<table>")
    render_tree(element.children, acc)
    acc.append("</table>")

def render_table_row(element, acc):
    acc.append("<tr>")
    for cell in element.cells:
        acc.append("<td>")
        acc.append(html.escape(cell))
        acc.append("</td>")
    acc.append("</tr>")

def render_table_separator_row(element, acc):
    acc.append("<tr class='__table-separator'></tr>")

def render_list_item(element, acc):
    acc.append("<li>")
    if element.tag is not None:
        acc.append("<span class='tag'>")
        render_text_tokens(element.tag, acc)
        acc.append("</span>")

    acc.append("<span class='item'>")
    render_text_tokens(element.content, acc)
    acc.append("</span></li>")


def render_code_block(element, acc):
    acc.append('<pre class="{}"><code>'.format(element.subtype.lower()))
    content = html.escape(element.lines)

    # Remove indentation common to all lines
    base_indentation = min([
        len(l) - len(l.lstrip(' '))
        for l in content.split('\n')
        if len(l.strip()) > 0
    ])
    content_lines = [
        l[base_indentation:]
        for l in content.split('\n')
    ]

    acc.append('\n'.join(content_lines))
    acc.append('</code></pre>')

def render_results_block(element, acc):
    # TODO:
    # acc.append('<pre class="results"><code>')
    # render_tree(element.children, acc)
    # acc.append('</code></pre>')
    pass

def render_org_text(element, acc):
    as_dom = org_rw.text_to_dom(element.contents, element)
    render_text_tokens(as_dom, acc)

def render_text(element, acc):
    acc.append('<div class="text">')
    render_text_tokens(element.content, acc)
    acc.append('</div>')

def render_text_tokens(tokens, acc):
    acc.append('<p>')
    for chunk in tokens:
        if isinstance(chunk, str):
            lines = chunk.replace('\n\n', '</p><p>')
            acc.append('<span class="line">{}</span>'.format(lines))
        elif isinstance(chunk, Link):
            link_target = chunk.value
            if link_target.startswith('id:'):
                link_target = './' + link_target[3:] + '.node.html'
            description = chunk.description
            if description is None:
                description = chunk.value

            acc.append('<a href="{}">{}</a>'.format(
                html.escape(link_target),
                html.escape(description),
            ))
        # else:
        #     raise NotImplementedError('TextToken: {}'.format(chunk))
    acc.append('</p>')


def render_tag(element, acc):
    return {
        dom.PropertyDrawerNode: render_property_drawer,
        dom.LogbookDrawerNode: render_logbook_drawer,
        dom.PropertyNode: render_property_node,
        dom.ListGroupNode: render_list_group,
        dom.ListItem: render_list_item,
        dom.TableNode: render_table,
        dom.TableSeparatorRow: render_table_separator_row,
        dom.TableRow: render_table_row,
        dom.CodeBlock: render_code_block,
        dom.Text: render_text,
        dom.ResultsDrawerNode: render_results_block,
        org_rw.Text: render_org_text,
    }[type(element)](element, acc)


def render_tree(tree, acc):
    for element in tree:
        render_tag(element, acc)

def render_inline(tree, f):
    acc = []
    f(tree, acc)
    return ''.join(acc)


def render_as_document(headline, doc, headlineLevel, title):
    if isinstance(headline.parent, org_rw.Headline):
        topLevelHeadline = headline.parent
        while isinstance(topLevelHeadline.parent, org_rw.Headline):
            topLevelHeadline = topLevelHeadline.parent
        return f"""<!DOCTYPE html>
<html>
  <head>
    <meta charset="utf-8">
    <title>{title} @ {SITE_NAME}</title>
    <meta http-equiv="refresh" content="0;./{topLevelHeadline.id}.node.html#{headline.id}" />
    <link href="../static/style.css" rel="stylesheet"/>
  </head>
  <body>
    <nav>
      <h1><a href="./index.html">Código para llevar [Notes]</a></h1>
    </nav>
    <a href='./{topLevelHeadline.id}.node.html#{headline.id}'>Sending you to the main note... [{org_rw.token_list_to_plaintext(topLevelHeadline.title.contents)}]</a>
  </body>
</html>
        """
    else:
        return as_document(render(headline, doc, headlineLevel), title)

def render(headline, doc, headlineLevel):
    try:
        dom = headline.as_dom()
    except:
        logging.error("Error generating DOM for {}".format(doc.path))
        raise
    print_tree(dom, indentation=2, headline=headline)

    content = []
    render_tree(dom, content)
    for child in headline.children:
        content.append(render(child, doc, headlineLevel=headlineLevel+1))

    if headline.state is None:
        state = ""
    else:
        state = f'<span class="state todo-{headline.is_todo} state-{headline.state}">{headline.state}</span>'

    if headline.is_todo:
        todo_state = "todo"
    else:
        todo_state = "done"

    tag_list = []
    for tag in headline.shallow_tags:
        tag_list.append(f'<span class="tag">{html.escape(tag)}</span>')
    tags = f'<span class="tags">{"".join(tag_list)}</span>'

    # display_state = 'collapsed'
    # if headlineLevel < MIN_HIDDEN_HEADLINE_LEVEL:
    #     display_state = 'expanded'
    display_state = 'expanded'

    title = render_inline(headline.title, render_tag)

    if headlineLevel > 0:
        title = f"<a href=\"javascript:toggle_expand('{html.escape(headline.id)}')\">{title}</a>"

    return f"""
<div id="{html.escape(headline.id)}" class="node {todo_state} {display_state}">
  <h1 class="title">
    {state}
    {title}
    {tags}
  </h1>
  <div class='contents'>
    {''.join(content)}
  </div>
</div>
"""


def as_document(html, title):
    return f"""<!DOCTYPE html>
<html>
  <head>
    <meta charset="utf-8">
    <title>{title} @ {SITE_NAME}</title>
    <link href="../static/style.css" rel="stylesheet"/>
    <script type="text/javascript">
      function toggle_expand(header_id) {{
        var e = document.getElementById(header_id);
        if (e.classList.contains('expanded')) {{
          e.classList.add('collapsed');
          e.classList.remove('expanded');
        }}
        else {{
          e.classList.add('expanded');
          e.classList.remove('collapsed');
        }}
      }}
    </script>
  </head>
  <body>
    <nav>
      <h1><a href="./index.html">Código para llevar [Notes]</a></h1>
      <input type="text" id="searchbox" disabled="true" placeholder="Search (requires JS)" />
    </nav>
    {html}

    <script src="../static/search-box.js"></script>
    <script tye="text/javascript">_codigoparallevar_enable_search_box('#searchbox', {{placeholder: 'Search...'}})</script>
  </body>
</html>
    """


def save_changes(doc):
    assert doc.path is not None
    with open(doc.path, "wt") as f:
        dump_org(doc, f)


if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: {} SOURCE_TOP DEST_TOP".format(sys.argv[0]))
        exit(0)

    logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s")
    exit(main(sys.argv[1], sys.argv[2]))