WIP: Save blog & notes data on FTS search DB.
This commit is contained in:
parent
b8eadc8b1e
commit
6d621ffc3c
@ -22,6 +22,7 @@ import shutil
|
||||
import traceback
|
||||
import time
|
||||
import re
|
||||
import sqlite3
|
||||
from typing import List
|
||||
|
||||
from bs4 import BeautifulSoup as bs4
|
||||
@ -63,6 +64,7 @@ JINJA_ENV = jinja2.Environment(
|
||||
autoescape=jinja2.select_autoescape()
|
||||
)
|
||||
|
||||
PARSER_NAMESPACE = 'codigoparallevar.com/blog'
|
||||
WATCH = True
|
||||
if os.getenv('WATCH_AND_REBUILD', '1') == '0':
|
||||
WATCH = False
|
||||
@ -176,6 +178,12 @@ def get_out_path(front_matter):
|
||||
return out_path
|
||||
|
||||
|
||||
def create_db(path):
|
||||
db = sqlite3.connect(path)
|
||||
db.execute('CREATE VIRTUAL TABLE IF NOT EXISTS note_search USING fts5(note_id, title, body, top_level_title, is_done, is_todo, parser_namespace, url, tokenize="trigram");')
|
||||
db.execute('DELETE FROM note_search WHERE parser_namespace = ?;', (PARSER_NAMESPACE,))
|
||||
return db
|
||||
|
||||
def load_all(top_dir_relative):
|
||||
top = os.path.abspath(top_dir_relative)
|
||||
|
||||
@ -456,10 +464,39 @@ def render_rss(docs, dest_top):
|
||||
f.write(result)
|
||||
|
||||
|
||||
def regen_all(source_top, dest_top, docs=None):
|
||||
def regen_all(source_top, dest_top, docs=None, db=None):
|
||||
if docs is None:
|
||||
docs = load_all(source_top)
|
||||
|
||||
cur = db.cursor()
|
||||
cleaned_db = False
|
||||
|
||||
try:
|
||||
cur.execute('DELETE FROM note_search WHERE parser_namespace = ?;', (PARSER_NAMESPACE,))
|
||||
cleaned_db = True
|
||||
except sqlite3.OperationalError as err:
|
||||
if WATCH:
|
||||
logging.warning("Error pre-cleaning DB, search won't be updated")
|
||||
else:
|
||||
raise
|
||||
|
||||
# Save posts to DB
|
||||
for (doc, front_matter, out_path) in docs.values():
|
||||
cur.execute('''INSERT INTO note_search(note_id, title, body, top_level_title, is_done, is_todo, parser_namespace, url) VALUES (?, ?, ?, ?, ?, ?, ?, ?);''',
|
||||
(
|
||||
out_path,
|
||||
front_matter['title'],
|
||||
doc,
|
||||
front_matter['title'],
|
||||
False,
|
||||
False,
|
||||
PARSER_NAMESPACE,
|
||||
out_path + '/index.html',
|
||||
))
|
||||
|
||||
cur.close()
|
||||
db.commit()
|
||||
|
||||
# Render posts
|
||||
for (doc, front_matter, out_path) in docs.values():
|
||||
doc_full_path = os.path.join(dest_top, out_path)
|
||||
@ -513,7 +550,8 @@ def main(source_top, dest_top):
|
||||
## Initial load
|
||||
t0 = time.time()
|
||||
logging.info("Initial load...")
|
||||
docs = regen_all(source_top, dest_top)
|
||||
db = create_db(os.path.join(dest_top, '..', 'db.sqlite3'))
|
||||
docs = regen_all(source_top, dest_top, db=db)
|
||||
logging.info("Initial load completed in {:.2f}s".format(time.time() - t0))
|
||||
|
||||
if not WATCH:
|
||||
@ -557,7 +595,7 @@ def main(source_top, dest_top):
|
||||
if is_static_resource:
|
||||
logging.info("Updated static resources in {:.2f}s".format(time.time() - t0))
|
||||
else:
|
||||
docs = regen_all(source_top, dest_top, docs)
|
||||
docs = regen_all(source_top, dest_top, docs, db=db)
|
||||
logging.info("Updated all in {:.2f}s".format(time.time() - t0))
|
||||
|
||||
else:
|
||||
|
@ -46,6 +46,7 @@ IMG_EXTENSIONS = set([
|
||||
"gif",
|
||||
])
|
||||
SKIPPED_TAGS = set(['attach'])
|
||||
PARSER_NAMESPACE = 'codigoparallevar.com/notes'
|
||||
|
||||
WATCH = True
|
||||
if os.getenv('WATCH_AND_REBUILD', '1') == '0':
|
||||
@ -88,11 +89,9 @@ def is_git_path(path):
|
||||
return any([chunk == ".git" for chunk in path.split(os.sep)])
|
||||
|
||||
def create_db(path):
|
||||
if os.path.exists(path):
|
||||
os.unlink(path)
|
||||
|
||||
db = sqlite3.connect(path)
|
||||
db.execute('CREATE VIRTUAL TABLE note_search USING fts5(note_id, title, body, top_level_title, is_done, is_todo, tokenize="trigram");')
|
||||
db.execute('CREATE VIRTUAL TABLE IF NOT EXISTS note_search USING fts5(note_id, title, body, top_level_title, is_done, is_todo, parser_namespace, url tokenize="trigram");')
|
||||
db.execute('DELETE FROM note_search WHERE parser_namespace = ?;', (PARSER_NAMESPACE,))
|
||||
return db
|
||||
|
||||
def load_all(top_dir_relative):
|
||||
@ -126,7 +125,7 @@ def regen_all(src_top, dest_top, *, docs=None, db=None):
|
||||
cleaned_db = False
|
||||
|
||||
try:
|
||||
cur.execute('DELETE FROM note_search;')
|
||||
cur.execute('DELETE FROM note_search WHERE parser_namespace = ?;', (PARSER_NAMESPACE,))
|
||||
cleaned_db = True
|
||||
except sqlite3.OperationalError as err:
|
||||
if WATCH:
|
||||
@ -262,7 +261,7 @@ def regen_all(src_top, dest_top, *, docs=None, db=None):
|
||||
topLevelHeadline = topLevelHeadline.parent
|
||||
|
||||
# Save for full-text-search
|
||||
cur.execute('''INSERT INTO note_search(note_id, title, body, top_level_title, is_done, is_todo) VALUES (?, ?, ?, ?, ?, ?);''',
|
||||
cur.execute('''INSERT INTO note_search(note_id, title, body, top_level_title, is_done, is_todo, parser_namespace, url) VALUES (?, ?, ?, ?, ?, ?, ?, ?);''',
|
||||
(
|
||||
headline.id,
|
||||
headline.title.get_text(),
|
||||
@ -270,6 +269,8 @@ def regen_all(src_top, dest_top, *, docs=None, db=None):
|
||||
topLevelHeadline.title.get_text(),
|
||||
headline.is_done,
|
||||
headline.is_todo,
|
||||
PARSER_NAMESPACE,
|
||||
headline.id + '.node.html',
|
||||
))
|
||||
|
||||
# Update graph, replace document ids with headline ids
|
||||
@ -356,7 +357,7 @@ def main(src_top, dest_top):
|
||||
t0 = time.time()
|
||||
|
||||
os.makedirs(dest_top, exist_ok=True)
|
||||
db = create_db(os.path.join(dest_top, 'db.sqlite3'))
|
||||
db = create_db(os.path.join(dest_top, '..', 'db.sqlite3'))
|
||||
docs = regen_all(src_top, dest_top, db=db)
|
||||
|
||||
if not WATCH:
|
||||
|
Loading…
Reference in New Issue
Block a user