Full-text-search with a Quick&Dirty SQLite DB.

This commit is contained in:
Sergio Martínez Portela 2022-09-30 00:13:22 +02:00
parent 8d136312b7
commit bc3bf30669

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import sqlite3
import time import time
import json import json
import html import html
@ -45,6 +46,14 @@ STATIC_PATH = os.path.join(ROOT_DIR, 'static')
def is_git_path(path): def is_git_path(path):
return any([chunk == ".git" for chunk in path.split(os.sep)]) return any([chunk == ".git" for chunk in path.split(os.sep)])
def create_db(path):
if os.path.exists(path):
os.unlink(path)
db = sqlite3.connect(path)
db.execute('CREATE VIRTUAL TABLE note_search USING fts5(note_id, title, body);')
return db
def load_all(top_dir_relative): def load_all(top_dir_relative):
top = os.path.abspath(top_dir_relative) top = os.path.abspath(top_dir_relative)
@ -70,8 +79,10 @@ def load_all(top_dir_relative):
logging.info("Collected {} files".format(len(docs))) logging.info("Collected {} files".format(len(docs)))
return docs return docs
def regen_all(src_top, dest_top, docs=None): def regen_all(src_top, dest_top, *, docs=None, db=None):
files_generated = 0 files_generated = 0
cur = db.cursor()
cur.execute('DELETE FROM note_search;')
docs = load_all(src_top) docs = load_all(src_top)
doc_to_headline_remapping = {} doc_to_headline_remapping = {}
@ -176,6 +187,15 @@ def regen_all(src_top, dest_top, docs=None):
"depth": headline.depth, "depth": headline.depth,
} }
# Save for full-text-search
cur.execute('''INSERT INTO note_search(note_id, title, body) VALUES (?, ?, ?);''',
(
headline.id,
headline.title.get_text(),
''.join(headline.get_contents('raw')),
))
# Render HTML
with open(endpath, "wt") as f: with open(endpath, "wt") as f:
f.write(as_document(render(headline, doc, headlineLevel=0), f.write(as_document(render(headline, doc, headlineLevel=0),
org_rw.token_list_to_plaintext(headline.title.contents))) org_rw.token_list_to_plaintext(headline.title.contents)))
@ -206,7 +226,8 @@ def regen_all(src_top, dest_top, docs=None):
f.write(source.replace('<!-- REPLACE_THIS_WITH_GRAPH -->', f.write(source.replace('<!-- REPLACE_THIS_WITH_GRAPH -->',
json.dumps(graph))) json.dumps(graph)))
logging.info("Generated {} files".format(files_generated)) logging.info("Generated {} files".format(files_generated))
cur.close()
db.commit()
def main(src_top, dest_top): def main(src_top, dest_top):
notifier = inotify.adapters.InotifyTrees([src_top, STATIC_PATH]) notifier = inotify.adapters.InotifyTrees([src_top, STATIC_PATH])
@ -214,7 +235,8 @@ def main(src_top, dest_top):
## Initial load ## Initial load
t0 = time.time() t0 = time.time()
docs = regen_all(src_top, dest_top) db = create_db(os.path.join(dest_top, 'db.sqlite3'))
docs = regen_all(src_top, dest_top, db=db)
logging.info("Initial load completed in {:.2f}s".format(time.time() - t0)) logging.info("Initial load completed in {:.2f}s".format(time.time() - t0))
## Updating ## Updating
@ -228,7 +250,7 @@ def main(src_top, dest_top):
print("CHANGED: {}".format(filepath)) print("CHANGED: {}".format(filepath))
t0 = time.time() t0 = time.time()
try: try:
docs = regen_all(src_top, dest_top, docs) docs = regen_all(src_top, dest_top, docs=docs, db=db)
except: except:
logging.error(traceback.format_exc()) logging.error(traceback.format_exc())
logging.error("Loading new templates failed 😿") logging.error("Loading new templates failed 😿")