new-codigoparallevar/scripts/generate.py

552 lines
17 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
import sqlite3
import time
import json
2021-08-26 22:22:48 +00:00
import html
import logging
import os
import sys
import uuid
from datetime import datetime
import traceback
import inotify.adapters
2022-05-07 21:03:26 +00:00
import org_rw
2022-05-06 13:58:28 +00:00
from org_rw import OrgTime, dom, Link
from org_rw import dump as dump_org
from org_rw import load as load_org
2021-08-26 22:22:48 +00:00
from org_rw import token_list_to_raw
EXTENSIONS = [
".org",
".org.txt",
]
2022-10-17 23:16:14 +00:00
WATCH = True
if os.getenv('WATCH_AND_REBUILD', '1') == '0':
WATCH = False
2022-05-07 18:38:12 +00:00
MIN_HIDDEN_HEADLINE_LEVEL = 2
2022-08-20 11:59:15 +00:00
INDEX_ID = "ea48ec1d-f9d4-4fb7-b39a-faa7b6e2ba95"
2022-08-20 16:21:26 +00:00
SITE_NAME = "Código para llevar"
MONITORED_EVENT_TYPES = (
'IN_CREATE',
# 'IN_MODIFY',
'IN_CLOSE_WRITE',
'IN_DELETE',
'IN_MOVED_FROM',
'IN_MOVED_TO',
'IN_DELETE_SELF',
'IN_MOVE_SELF',
)
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
STATIC_PATH = os.path.join(ROOT_DIR, 'static')
def is_git_path(path):
2022-08-20 17:17:08 +00:00
return any([chunk == ".git" for chunk in path.split(os.sep)])
def create_db(path):
if os.path.exists(path):
os.unlink(path)
db = sqlite3.connect(path)
db.execute('CREATE VIRTUAL TABLE note_search USING fts5(note_id, title, body, top_level_title, is_done, is_todo);')
return db
def load_all(top_dir_relative):
top = os.path.abspath(top_dir_relative)
docs = []
for root, dirs, files in os.walk(top):
for name in files:
if ".org" not in name:
continue
path = os.path.join(root, name)
try:
doc = load_org(open(path), extra_cautious=True)
docs.append(doc)
except Exception as err:
import traceback
traceback.print_exc()
print(f"== On {path}")
sys.exit(1)
logging.info("Collected {} files".format(len(docs)))
return docs
def regen_all(src_top, dest_top, *, docs=None, db=None):
2021-08-26 22:22:48 +00:00
files_generated = 0
cur = db.cursor()
cur.execute('DELETE FROM note_search;')
docs = load_all(src_top)
doc_to_headline_remapping = {}
os.makedirs(dest_top, exist_ok=True)
graph = {}
for doc in docs:
relpath = os.path.relpath(doc.path, src_top)
changed = False
2021-09-03 18:19:45 +00:00
headlines = list(doc.getAllHeadlines())
related = None
i = len(headlines)
while i > 0:
i -= 1
headline = headlines[i]
2022-08-28 12:09:57 +00:00
if headline.title.get_text().strip().lower() == "related" and headline.depth == 1:
2021-09-03 18:19:45 +00:00
if related is not None:
print(
"Found duplicated related: {} vs {}".format(
related.id, headline.id
)
)
assert related is None
related = headline
headlines.pop(i)
for headline in headlines:
if headline.id is None:
headline.id = str(uuid.uuid4())
changed = True
if changed:
print("Updated", relpath)
save_changes(doc)
if not relpath.startswith("public/"):
# print("Skip:", relpath)
continue
2021-08-26 22:22:48 +00:00
main_headline = None
topHeadlines = doc.getTopHeadlines()
if ((len(topHeadlines) == 1 and related is None)
or (len(topHeadlines) == 2 and related is not None)):
main_headline = [h for h in topHeadlines if h != related][0]
if doc.id is not None:
endpath = os.path.join(dest_top, doc.id + ".node.html")
with open(endpath, "wt") as f:
doc_to_headline_remapping['id:' + doc.id] = 'id:' + main_headline.id
2022-10-18 20:47:04 +00:00
f.write(render_as_document(main_headline, doc, headlineLevel=0,
title=org_rw.token_list_to_plaintext(main_headline.title.contents)))
files_generated += 1
elif doc.id is not None:
logging.error("Cannot render document from id: {}. {} headlines {} related".format(
relpath,
len(topHeadlines),
'with' if related is not None else 'without'
))
2021-08-26 22:22:48 +00:00
for headline in headlines:
endpath = os.path.join(dest_top, headline.id + ".node.html")
links = []
headline_links = list(headline.get_links())
if headline == main_headline and related is not None:
headline_links.extend(list(related.get_links()))
for l in headline_links:
if l.value.startswith('http://') or l.value.startswith('https://'):
pass # Ignore for now, external URL
elif l.value.startswith('id:'):
links.append({'target': l.value})
elif l.value.startswith('attachment:'):
pass # Ignore, attachment
2022-05-16 21:27:26 +00:00
elif l.value.startswith('file:'):
pass # Ignore, attachment
elif l.value.startswith('notmuch:'):
pass # Ignore, mail
elif l.value.startswith('orgit-rev:'):
pass # Ignore, mail
elif l.value.startswith('*'):
pass # Ignore, internal
elif not ':' in l.value.split()[0]:
pass # Ignore, internal
elif l.value.startswith('./'):
pass # TODO: Properly handle
else:
2022-08-19 17:30:55 +00:00
logging.warning('On document {}, unknown link to {}'.format(doc.path, l.value))
if headline.parent:
if isinstance(headline.parent, org_rw.Headline):
links.append({
"target": headline.parent.id,
"relation": "in"
})
graph[headline.id] = {
"title": org_rw.org_rw.token_list_to_plaintext(headline.title.contents).strip(),
"links": links,
"depth": headline.depth,
}
topLevelHeadline = headline
while isinstance(topLevelHeadline.parent, org_rw.Headline):
topLevelHeadline = topLevelHeadline.parent
# Save for full-text-search
cur.execute('''INSERT INTO note_search(note_id, title, body, top_level_title, is_done, is_todo) VALUES (?, ?, ?, ?, ?, ?);''',
(
headline.id,
headline.title.get_text(),
''.join(headline.get_contents('raw')),
topLevelHeadline.title.get_text(),
headline.is_done,
headline.is_todo,
))
# Render HTML
2021-08-26 22:22:48 +00:00
with open(endpath, "wt") as f:
2022-10-18 20:47:04 +00:00
f.write(render_as_document(headline, doc, headlineLevel=0,
title=org_rw.token_list_to_plaintext(headline.title.contents)))
2021-08-26 22:22:48 +00:00
files_generated += 1
2022-08-20 11:59:15 +00:00
if headline.id == INDEX_ID:
index_endpath = os.path.join(dest_top, "index.html")
with open(index_endpath, "wt") as f:
2022-10-18 20:47:04 +00:00
f.write(render_as_document(headline, doc, headlineLevel=0,
title=org_rw.token_list_to_plaintext(headline.title.contents)))
2022-08-20 11:59:15 +00:00
files_generated += 1
# Update graph, replace document ids with headline ids
for headline_data in graph.values():
for link in headline_data['links']:
if link['target'] in doc_to_headline_remapping:
link['target'] = doc_to_headline_remapping[link['target']]
# Output graph files
graphpath = os.path.join(dest_top, "graph.json")
graph_explorer_path = os.path.join(dest_top, "graph.html")
with open(graphpath, "wt") as f:
json.dump(obj=graph, fp=f, indent=2)
graph_explorer_path = os.path.join(dest_top, "graph.html")
with open(graph_explorer_path, 'wt') as f:
2022-08-19 17:30:24 +00:00
with open(os.path.join(os.path.dirname(os.path.abspath(dest_top)), '..', 'static', 'graph_explorer.html'), 'rt') as template:
source = template.read()
f.write(source.replace('<!-- REPLACE_THIS_WITH_GRAPH -->',
json.dumps(graph)))
2021-08-26 22:22:48 +00:00
logging.info("Generated {} files".format(files_generated))
cur.close()
db.commit()
2021-08-26 22:22:48 +00:00
def main(src_top, dest_top):
notifier = inotify.adapters.InotifyTrees([src_top, STATIC_PATH])
## Initial load
t0 = time.time()
db = create_db(os.path.join(dest_top, 'db.sqlite3'))
docs = regen_all(src_top, dest_top, db=db)
2022-10-17 23:16:14 +00:00
if not WATCH:
logging.info("Build completed in {:.2f}s".format(time.time() - t0))
return 0
logging.info("Initial load completed in {:.2f}s".format(time.time() - t0))
## Updating
for event in notifier.event_gen(yield_nones=False):
(ev, types, directory, file) = event
if not any([type in MONITORED_EVENT_TYPES for type in types]):
continue
if is_git_path(directory):
continue
filepath = os.path.join(directory, file)
print("CHANGED: {}".format(filepath))
t0 = time.time()
try:
docs = regen_all(src_top, dest_top, docs=docs, db=db)
except:
logging.error(traceback.format_exc())
logging.error("Loading new templates failed 😿")
continue
logging.info("Updated all in {:.2f}s".format(time.time() - t0))
def print_tree(tree, indentation=0, headline=None):
# if headline and headline.id != INDEX_ID:
# return
return
2021-08-26 22:22:48 +00:00
for element in tree:
if "children" in dir(element):
if len(element.children) > 0:
print_element(element.children, indentation + 1, headline)
2021-08-26 22:22:48 +00:00
print()
elif "content" in dir(element):
for content in element.content:
print_element(content, indentation + 1, headline)
def print_element(element, indentation, headline):
if isinstance(element, org_rw.Link):
print(" " * indentation, "Link:", element.get_raw())
elif isinstance(element, str):
print(" " * indentation, "{" + element + "}", type(element))
else:
print_tree(element, indentation, headline)
2021-08-26 22:22:48 +00:00
def render_property_drawer(element, acc):
pass
def render_logbook_drawer(element, acc):
pass
def render_property_node(element, acc):
pass
def render_list_group(element, acc):
acc.append("<ul>")
render_tree(element.children, acc)
acc.append("</ul>")
2022-09-27 22:04:06 +00:00
def render_table(element, acc):
acc.append("<table>")
render_tree(element.children, acc)
acc.append("</table>")
def render_table_row(element, acc):
acc.append("<tr>")
for cell in element.cells:
acc.append("<td>")
acc.append(html.escape(cell))
acc.append("</td>")
acc.append("</tr>")
def render_table_separator_row(element, acc):
acc.append("<tr class='__table-separator'></tr>")
2021-08-26 22:22:48 +00:00
def render_list_item(element, acc):
acc.append("<li>")
2022-05-06 18:19:11 +00:00
if element.tag is not None:
2021-08-26 22:22:48 +00:00
acc.append("<span class='tag'>")
2022-08-27 11:32:40 +00:00
render_text_tokens(element.tag, acc)
2021-08-26 22:22:48 +00:00
acc.append("</span>")
acc.append("<span class='item'>")
2022-05-06 18:19:11 +00:00
render_text_tokens(element.content, acc)
2021-08-26 22:22:48 +00:00
acc.append("</span></li>")
def render_code_block(element, acc):
2022-09-29 21:48:56 +00:00
acc.append('<pre class="{}"><code>'.format(element.subtype.lower()))
content = html.escape(element.lines)
# Remove indentation common to all lines
base_indentation = min([
len(l) - len(l.lstrip(' '))
for l in content.split('\n')
if len(l.strip()) > 0
])
content_lines = [
l[base_indentation:]
for l in content.split('\n')
]
acc.append('\n'.join(content_lines))
2022-05-07 21:03:26 +00:00
acc.append('</code></pre>')
2021-08-26 22:22:48 +00:00
2022-05-16 21:28:59 +00:00
def render_results_block(element, acc):
# TODO:
# acc.append('<pre class="results"><code>')
# render_tree(element.children, acc)
# acc.append('</code></pre>')
pass
2022-08-28 12:09:57 +00:00
def render_org_text(element, acc):
as_dom = org_rw.text_to_dom(element.contents, element)
render_text_tokens(as_dom, acc)
2021-08-26 22:22:48 +00:00
def render_text(element, acc):
2022-08-20 15:51:08 +00:00
acc.append('<div class="text">')
2022-05-06 18:19:11 +00:00
render_text_tokens(element.content, acc)
2022-08-20 15:51:08 +00:00
acc.append('</div>')
2022-05-06 18:19:11 +00:00
def render_text_tokens(tokens, acc):
2022-08-20 15:51:08 +00:00
acc.append('<p>')
2022-05-06 18:19:11 +00:00
for chunk in tokens:
2022-05-06 13:58:28 +00:00
if isinstance(chunk, str):
2022-08-20 15:51:08 +00:00
lines = chunk.replace('\n\n', '</p><p>')
acc.append('<span class="line">{}</span>'.format(lines))
2022-05-06 13:58:28 +00:00
elif isinstance(chunk, Link):
2022-05-06 19:18:16 +00:00
link_target = chunk.value
if link_target.startswith('id:'):
link_target = './' + link_target[3:] + '.node.html'
2022-05-07 21:44:37 +00:00
description = chunk.description
if description is None:
description = chunk.value
acc.append('<a href="{}">{}</a>'.format(
html.escape(link_target),
html.escape(description),
))
# else:
# raise NotImplementedError('TextToken: {}'.format(chunk))
2022-08-20 15:51:08 +00:00
acc.append('</p>')
2021-08-26 22:22:48 +00:00
def render_tag(element, acc):
return {
dom.PropertyDrawerNode: render_property_drawer,
dom.LogbookDrawerNode: render_logbook_drawer,
dom.PropertyNode: render_property_node,
dom.ListGroupNode: render_list_group,
dom.ListItem: render_list_item,
2022-09-27 22:04:06 +00:00
dom.TableNode: render_table,
dom.TableSeparatorRow: render_table_separator_row,
dom.TableRow: render_table_row,
2021-08-26 22:22:48 +00:00
dom.CodeBlock: render_code_block,
dom.Text: render_text,
2022-05-16 21:28:59 +00:00
dom.ResultsDrawerNode: render_results_block,
2022-08-28 12:09:57 +00:00
org_rw.Text: render_org_text,
2021-08-26 22:22:48 +00:00
}[type(element)](element, acc)
def render_tree(tree, acc):
for element in tree:
render_tag(element, acc)
2022-08-28 12:09:57 +00:00
def render_inline(tree, f):
acc = []
f(tree, acc)
return ''.join(acc)
2021-08-26 22:22:48 +00:00
2022-10-18 20:47:04 +00:00
def render_as_document(headline, doc, headlineLevel, title):
if isinstance(headline.parent, org_rw.Headline):
topLevelHeadline = headline.parent
while isinstance(topLevelHeadline.parent, org_rw.Headline):
topLevelHeadline = topLevelHeadline.parent
return f"""<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>{title} @ {SITE_NAME}</title>
<meta http-equiv="refresh" content="0;./{topLevelHeadline.id}.node.html#{headline.id}" />
<link href="../static/style.css" rel="stylesheet"/>
</head>
<body>
<nav>
2022-10-18 21:18:57 +00:00
<h1><a href="./index.html">Código para llevar [Notes]</a></h1>
2022-10-18 20:47:04 +00:00
</nav>
<a href='./{topLevelHeadline.id}.node.html#{headline.id}'>Sending you to the main note... [{org_rw.token_list_to_plaintext(topLevelHeadline.title.contents)}]</a>
</body>
</html>
2022-10-20 21:44:35 +00:00
"""
2022-10-18 20:47:04 +00:00
else:
return as_document(render(headline, doc, headlineLevel), title)
def render(headline, doc, headlineLevel):
try:
dom = headline.as_dom()
except:
logging.error("Error generating DOM for {}".format(doc.path))
raise
print_tree(dom, indentation=2, headline=headline)
2021-08-26 22:22:48 +00:00
content = []
render_tree(dom, content)
2021-09-03 22:26:28 +00:00
for child in headline.children:
2022-05-07 18:38:12 +00:00
content.append(render(child, doc, headlineLevel=headlineLevel+1))
2021-08-26 22:22:48 +00:00
2021-09-03 18:19:45 +00:00
if headline.state is None:
state = ""
else:
state = f'<span class="state todo-{headline.is_todo} state-{headline.state}">{headline.state}</span>'
if headline.is_todo:
todo_state = "todo"
else:
todo_state = "done"
2022-05-07 18:38:12 +00:00
2022-09-01 22:31:16 +00:00
tag_list = []
for tag in headline.shallow_tags:
tag_list.append(f'<span class="tag">{html.escape(tag)}</span>')
tags = f'<span class="tags">{"".join(tag_list)}</span>'
2022-08-28 12:10:08 +00:00
# display_state = 'collapsed'
# if headlineLevel < MIN_HIDDEN_HEADLINE_LEVEL:
# display_state = 'expanded'
display_state = 'expanded'
2022-05-07 18:38:12 +00:00
2022-10-20 21:44:35 +00:00
title = render_inline(headline.title, render_tag)
if headlineLevel > 0:
title = f"<a href=\"javascript:toggle_expand('{html.escape(headline.id)}')\">{title}</a>"
2021-08-26 22:22:48 +00:00
return f"""
2022-05-07 18:38:12 +00:00
<div id="{html.escape(headline.id)}" class="node {todo_state} {display_state}">
2021-08-26 22:22:48 +00:00
<h1 class="title">
2021-09-03 18:19:45 +00:00
{state}
2022-10-20 21:44:35 +00:00
{title}
2022-09-01 22:31:16 +00:00
{tags}
2021-08-26 22:22:48 +00:00
</h1>
2022-05-07 18:38:12 +00:00
<div class='contents'>
{''.join(content)}
</div>
2021-09-03 22:26:10 +00:00
</div>
2021-08-26 22:22:48 +00:00
"""
2022-08-20 16:21:26 +00:00
def as_document(html, title):
2022-05-07 16:35:18 +00:00
return f"""<!DOCTYPE html>
<html>
<head>
2022-08-20 11:59:15 +00:00
<meta charset="utf-8">
2022-08-20 16:21:26 +00:00
<title>{title} @ {SITE_NAME}</title>
2021-09-03 22:26:10 +00:00
<link href="../static/style.css" rel="stylesheet"/>
2022-05-07 18:38:12 +00:00
<script type="text/javascript">
function toggle_expand(header_id) {{
var e = document.getElementById(header_id);
if (e.classList.contains('expanded')) {{
e.classList.add('collapsed');
e.classList.remove('expanded');
}}
else {{
e.classList.add('expanded');
e.classList.remove('collapsed');
}}
}}
</script>
2022-05-07 16:35:18 +00:00
</head>
<body>
2022-10-03 21:40:57 +00:00
<nav>
2022-10-18 21:18:57 +00:00
<h1><a href="./index.html">Código para llevar [Notes]</a></h1>
2022-10-16 22:10:06 +00:00
<input type="text" id="searchbox" disabled="true" placeholder="Search (requires JS)" />
2022-10-03 21:40:57 +00:00
</nav>
2021-09-03 22:26:10 +00:00
{html}
2022-10-03 21:40:57 +00:00
<script src="../static/search-box.js"></script>
2022-10-16 22:10:06 +00:00
<script tye="text/javascript">_codigoparallevar_enable_search_box('#searchbox', {{placeholder: 'Search...'}})</script>
2022-05-07 16:35:18 +00:00
</body>
</html>
2021-09-03 22:26:10 +00:00
"""
def save_changes(doc):
assert doc.path is not None
with open(doc.path, "wt") as f:
2021-09-03 18:19:45 +00:00
dump_org(doc, f)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: {} SOURCE_TOP DEST_TOP".format(sys.argv[0]))
exit(0)
logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s")
2022-10-17 23:16:14 +00:00
exit(main(sys.argv[1], sys.argv[2]))