diff --git a/scripts/blog.py b/scripts/blog.py index d177201..d2d6d83 100644 --- a/scripts/blog.py +++ b/scripts/blog.py @@ -22,6 +22,7 @@ import shutil import traceback import time import re +import sqlite3 from typing import List from bs4 import BeautifulSoup as bs4 @@ -63,6 +64,7 @@ JINJA_ENV = jinja2.Environment( autoescape=jinja2.select_autoescape() ) +PARSER_NAMESPACE = 'codigoparallevar.com/blog' WATCH = True if os.getenv('WATCH_AND_REBUILD', '1') == '0': WATCH = False @@ -176,6 +178,12 @@ def get_out_path(front_matter): return out_path +def create_db(path): + db = sqlite3.connect(path) + db.execute('CREATE VIRTUAL TABLE IF NOT EXISTS note_search USING fts5(note_id, title, body, top_level_title, is_done, is_todo, parser_namespace, url, tokenize="trigram");') + db.execute('DELETE FROM note_search WHERE parser_namespace = ?;', (PARSER_NAMESPACE,)) + return db + def load_all(top_dir_relative): top = os.path.abspath(top_dir_relative) @@ -456,10 +464,39 @@ def render_rss(docs, dest_top): f.write(result) -def regen_all(source_top, dest_top, docs=None): +def regen_all(source_top, dest_top, docs=None, db=None): if docs is None: docs = load_all(source_top) + cur = db.cursor() + cleaned_db = False + + try: + cur.execute('DELETE FROM note_search WHERE parser_namespace = ?;', (PARSER_NAMESPACE,)) + cleaned_db = True + except sqlite3.OperationalError as err: + if WATCH: + logging.warning("Error pre-cleaning DB, search won't be updated") + else: + raise + + # Save posts to DB + for (doc, front_matter, out_path) in docs.values(): + cur.execute('''INSERT INTO note_search(note_id, title, body, top_level_title, is_done, is_todo, parser_namespace, url) VALUES (?, ?, ?, ?, ?, ?, ?, ?);''', + ( + out_path, + front_matter['title'], + doc, + front_matter['title'], + False, + False, + PARSER_NAMESPACE, + out_path + '/index.html', + )) + + cur.close() + db.commit() + # Render posts for (doc, front_matter, out_path) in docs.values(): doc_full_path = os.path.join(dest_top, out_path) @@ -513,7 +550,8 @@ def main(source_top, dest_top): ## Initial load t0 = time.time() logging.info("Initial load...") - docs = regen_all(source_top, dest_top) + db = create_db(os.path.join(dest_top, '..', 'db.sqlite3')) + docs = regen_all(source_top, dest_top, db=db) logging.info("Initial load completed in {:.2f}s".format(time.time() - t0)) if not WATCH: @@ -557,7 +595,7 @@ def main(source_top, dest_top): if is_static_resource: logging.info("Updated static resources in {:.2f}s".format(time.time() - t0)) else: - docs = regen_all(source_top, dest_top, docs) + docs = regen_all(source_top, dest_top, docs, db=db) logging.info("Updated all in {:.2f}s".format(time.time() - t0)) else: diff --git a/scripts/generate.py b/scripts/generate.py index cc18e2a..4ceaf8c 100644 --- a/scripts/generate.py +++ b/scripts/generate.py @@ -46,6 +46,7 @@ IMG_EXTENSIONS = set([ "gif", ]) SKIPPED_TAGS = set(['attach']) +PARSER_NAMESPACE = 'codigoparallevar.com/notes' WATCH = True if os.getenv('WATCH_AND_REBUILD', '1') == '0': @@ -88,11 +89,9 @@ def is_git_path(path): return any([chunk == ".git" for chunk in path.split(os.sep)]) def create_db(path): - if os.path.exists(path): - os.unlink(path) - db = sqlite3.connect(path) - db.execute('CREATE VIRTUAL TABLE note_search USING fts5(note_id, title, body, top_level_title, is_done, is_todo, tokenize="trigram");') + db.execute('CREATE VIRTUAL TABLE IF NOT EXISTS note_search USING fts5(note_id, title, body, top_level_title, is_done, is_todo, parser_namespace, url tokenize="trigram");') + db.execute('DELETE FROM note_search WHERE parser_namespace = ?;', (PARSER_NAMESPACE,)) return db def load_all(top_dir_relative): @@ -126,7 +125,7 @@ def regen_all(src_top, dest_top, *, docs=None, db=None): cleaned_db = False try: - cur.execute('DELETE FROM note_search;') + cur.execute('DELETE FROM note_search WHERE parser_namespace = ?;', (PARSER_NAMESPACE,)) cleaned_db = True except sqlite3.OperationalError as err: if WATCH: @@ -262,7 +261,7 @@ def regen_all(src_top, dest_top, *, docs=None, db=None): topLevelHeadline = topLevelHeadline.parent # Save for full-text-search - cur.execute('''INSERT INTO note_search(note_id, title, body, top_level_title, is_done, is_todo) VALUES (?, ?, ?, ?, ?, ?);''', + cur.execute('''INSERT INTO note_search(note_id, title, body, top_level_title, is_done, is_todo, parser_namespace, url) VALUES (?, ?, ?, ?, ?, ?, ?, ?);''', ( headline.id, headline.title.get_text(), @@ -270,6 +269,8 @@ def regen_all(src_top, dest_top, *, docs=None, db=None): topLevelHeadline.title.get_text(), headline.is_done, headline.is_todo, + PARSER_NAMESPACE, + headline.id + '.node.html', )) # Update graph, replace document ids with headline ids @@ -356,7 +357,7 @@ def main(src_top, dest_top): t0 = time.time() os.makedirs(dest_top, exist_ok=True) - db = create_db(os.path.join(dest_top, 'db.sqlite3')) + db = create_db(os.path.join(dest_top, '..', 'db.sqlite3')) docs = regen_all(src_top, dest_top, db=db) if not WATCH: