WIP: Save blog & notes data on FTS search DB.
This commit is contained in:
parent
b8eadc8b1e
commit
6d621ffc3c
@ -22,6 +22,7 @@ import shutil
|
|||||||
import traceback
|
import traceback
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
|
import sqlite3
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from bs4 import BeautifulSoup as bs4
|
from bs4 import BeautifulSoup as bs4
|
||||||
@ -63,6 +64,7 @@ JINJA_ENV = jinja2.Environment(
|
|||||||
autoescape=jinja2.select_autoescape()
|
autoescape=jinja2.select_autoescape()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
PARSER_NAMESPACE = 'codigoparallevar.com/blog'
|
||||||
WATCH = True
|
WATCH = True
|
||||||
if os.getenv('WATCH_AND_REBUILD', '1') == '0':
|
if os.getenv('WATCH_AND_REBUILD', '1') == '0':
|
||||||
WATCH = False
|
WATCH = False
|
||||||
@ -176,6 +178,12 @@ def get_out_path(front_matter):
|
|||||||
return out_path
|
return out_path
|
||||||
|
|
||||||
|
|
||||||
|
def create_db(path):
|
||||||
|
db = sqlite3.connect(path)
|
||||||
|
db.execute('CREATE VIRTUAL TABLE IF NOT EXISTS note_search USING fts5(note_id, title, body, top_level_title, is_done, is_todo, parser_namespace, url, tokenize="trigram");')
|
||||||
|
db.execute('DELETE FROM note_search WHERE parser_namespace = ?;', (PARSER_NAMESPACE,))
|
||||||
|
return db
|
||||||
|
|
||||||
def load_all(top_dir_relative):
|
def load_all(top_dir_relative):
|
||||||
top = os.path.abspath(top_dir_relative)
|
top = os.path.abspath(top_dir_relative)
|
||||||
|
|
||||||
@ -456,10 +464,39 @@ def render_rss(docs, dest_top):
|
|||||||
f.write(result)
|
f.write(result)
|
||||||
|
|
||||||
|
|
||||||
def regen_all(source_top, dest_top, docs=None):
|
def regen_all(source_top, dest_top, docs=None, db=None):
|
||||||
if docs is None:
|
if docs is None:
|
||||||
docs = load_all(source_top)
|
docs = load_all(source_top)
|
||||||
|
|
||||||
|
cur = db.cursor()
|
||||||
|
cleaned_db = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
cur.execute('DELETE FROM note_search WHERE parser_namespace = ?;', (PARSER_NAMESPACE,))
|
||||||
|
cleaned_db = True
|
||||||
|
except sqlite3.OperationalError as err:
|
||||||
|
if WATCH:
|
||||||
|
logging.warning("Error pre-cleaning DB, search won't be updated")
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Save posts to DB
|
||||||
|
for (doc, front_matter, out_path) in docs.values():
|
||||||
|
cur.execute('''INSERT INTO note_search(note_id, title, body, top_level_title, is_done, is_todo, parser_namespace, url) VALUES (?, ?, ?, ?, ?, ?, ?, ?);''',
|
||||||
|
(
|
||||||
|
out_path,
|
||||||
|
front_matter['title'],
|
||||||
|
doc,
|
||||||
|
front_matter['title'],
|
||||||
|
False,
|
||||||
|
False,
|
||||||
|
PARSER_NAMESPACE,
|
||||||
|
out_path + '/index.html',
|
||||||
|
))
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
db.commit()
|
||||||
|
|
||||||
# Render posts
|
# Render posts
|
||||||
for (doc, front_matter, out_path) in docs.values():
|
for (doc, front_matter, out_path) in docs.values():
|
||||||
doc_full_path = os.path.join(dest_top, out_path)
|
doc_full_path = os.path.join(dest_top, out_path)
|
||||||
@ -513,7 +550,8 @@ def main(source_top, dest_top):
|
|||||||
## Initial load
|
## Initial load
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
logging.info("Initial load...")
|
logging.info("Initial load...")
|
||||||
docs = regen_all(source_top, dest_top)
|
db = create_db(os.path.join(dest_top, '..', 'db.sqlite3'))
|
||||||
|
docs = regen_all(source_top, dest_top, db=db)
|
||||||
logging.info("Initial load completed in {:.2f}s".format(time.time() - t0))
|
logging.info("Initial load completed in {:.2f}s".format(time.time() - t0))
|
||||||
|
|
||||||
if not WATCH:
|
if not WATCH:
|
||||||
@ -557,7 +595,7 @@ def main(source_top, dest_top):
|
|||||||
if is_static_resource:
|
if is_static_resource:
|
||||||
logging.info("Updated static resources in {:.2f}s".format(time.time() - t0))
|
logging.info("Updated static resources in {:.2f}s".format(time.time() - t0))
|
||||||
else:
|
else:
|
||||||
docs = regen_all(source_top, dest_top, docs)
|
docs = regen_all(source_top, dest_top, docs, db=db)
|
||||||
logging.info("Updated all in {:.2f}s".format(time.time() - t0))
|
logging.info("Updated all in {:.2f}s".format(time.time() - t0))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -46,6 +46,7 @@ IMG_EXTENSIONS = set([
|
|||||||
"gif",
|
"gif",
|
||||||
])
|
])
|
||||||
SKIPPED_TAGS = set(['attach'])
|
SKIPPED_TAGS = set(['attach'])
|
||||||
|
PARSER_NAMESPACE = 'codigoparallevar.com/notes'
|
||||||
|
|
||||||
WATCH = True
|
WATCH = True
|
||||||
if os.getenv('WATCH_AND_REBUILD', '1') == '0':
|
if os.getenv('WATCH_AND_REBUILD', '1') == '0':
|
||||||
@ -88,11 +89,9 @@ def is_git_path(path):
|
|||||||
return any([chunk == ".git" for chunk in path.split(os.sep)])
|
return any([chunk == ".git" for chunk in path.split(os.sep)])
|
||||||
|
|
||||||
def create_db(path):
|
def create_db(path):
|
||||||
if os.path.exists(path):
|
|
||||||
os.unlink(path)
|
|
||||||
|
|
||||||
db = sqlite3.connect(path)
|
db = sqlite3.connect(path)
|
||||||
db.execute('CREATE VIRTUAL TABLE note_search USING fts5(note_id, title, body, top_level_title, is_done, is_todo, tokenize="trigram");')
|
db.execute('CREATE VIRTUAL TABLE IF NOT EXISTS note_search USING fts5(note_id, title, body, top_level_title, is_done, is_todo, parser_namespace, url tokenize="trigram");')
|
||||||
|
db.execute('DELETE FROM note_search WHERE parser_namespace = ?;', (PARSER_NAMESPACE,))
|
||||||
return db
|
return db
|
||||||
|
|
||||||
def load_all(top_dir_relative):
|
def load_all(top_dir_relative):
|
||||||
@ -126,7 +125,7 @@ def regen_all(src_top, dest_top, *, docs=None, db=None):
|
|||||||
cleaned_db = False
|
cleaned_db = False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cur.execute('DELETE FROM note_search;')
|
cur.execute('DELETE FROM note_search WHERE parser_namespace = ?;', (PARSER_NAMESPACE,))
|
||||||
cleaned_db = True
|
cleaned_db = True
|
||||||
except sqlite3.OperationalError as err:
|
except sqlite3.OperationalError as err:
|
||||||
if WATCH:
|
if WATCH:
|
||||||
@ -262,7 +261,7 @@ def regen_all(src_top, dest_top, *, docs=None, db=None):
|
|||||||
topLevelHeadline = topLevelHeadline.parent
|
topLevelHeadline = topLevelHeadline.parent
|
||||||
|
|
||||||
# Save for full-text-search
|
# Save for full-text-search
|
||||||
cur.execute('''INSERT INTO note_search(note_id, title, body, top_level_title, is_done, is_todo) VALUES (?, ?, ?, ?, ?, ?);''',
|
cur.execute('''INSERT INTO note_search(note_id, title, body, top_level_title, is_done, is_todo, parser_namespace, url) VALUES (?, ?, ?, ?, ?, ?, ?, ?);''',
|
||||||
(
|
(
|
||||||
headline.id,
|
headline.id,
|
||||||
headline.title.get_text(),
|
headline.title.get_text(),
|
||||||
@ -270,6 +269,8 @@ def regen_all(src_top, dest_top, *, docs=None, db=None):
|
|||||||
topLevelHeadline.title.get_text(),
|
topLevelHeadline.title.get_text(),
|
||||||
headline.is_done,
|
headline.is_done,
|
||||||
headline.is_todo,
|
headline.is_todo,
|
||||||
|
PARSER_NAMESPACE,
|
||||||
|
headline.id + '.node.html',
|
||||||
))
|
))
|
||||||
|
|
||||||
# Update graph, replace document ids with headline ids
|
# Update graph, replace document ids with headline ids
|
||||||
@ -356,7 +357,7 @@ def main(src_top, dest_top):
|
|||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
|
|
||||||
os.makedirs(dest_top, exist_ok=True)
|
os.makedirs(dest_top, exist_ok=True)
|
||||||
db = create_db(os.path.join(dest_top, 'db.sqlite3'))
|
db = create_db(os.path.join(dest_top, '..', 'db.sqlite3'))
|
||||||
docs = regen_all(src_top, dest_top, db=db)
|
docs = regen_all(src_top, dest_top, db=db)
|
||||||
|
|
||||||
if not WATCH:
|
if not WATCH:
|
||||||
|
Loading…
Reference in New Issue
Block a user