Compare commits

..

1 Commits

Author SHA1 Message Date
Sergio Martínez Portela
6d621ffc3c WIP: Save blog & notes data on FTS search DB. 2023-10-04 00:19:39 +02:00
5 changed files with 149 additions and 146 deletions

View File

@ -22,6 +22,7 @@ import shutil
import traceback
import time
import re
import sqlite3
from typing import List
from bs4 import BeautifulSoup as bs4
@ -63,6 +64,7 @@ JINJA_ENV = jinja2.Environment(
autoescape=jinja2.select_autoescape()
)
PARSER_NAMESPACE = 'codigoparallevar.com/blog'
WATCH = True
if os.getenv('WATCH_AND_REBUILD', '1') == '0':
WATCH = False
@ -176,6 +178,12 @@ def get_out_path(front_matter):
return out_path
def create_db(path):
db = sqlite3.connect(path)
db.execute('CREATE VIRTUAL TABLE IF NOT EXISTS note_search USING fts5(note_id, title, body, top_level_title, is_done, is_todo, parser_namespace, url, tokenize="trigram");')
db.execute('DELETE FROM note_search WHERE parser_namespace = ?;', (PARSER_NAMESPACE,))
return db
def load_all(top_dir_relative):
top = os.path.abspath(top_dir_relative)
@ -456,10 +464,39 @@ def render_rss(docs, dest_top):
f.write(result)
def regen_all(source_top, dest_top, docs=None):
def regen_all(source_top, dest_top, docs=None, db=None):
if docs is None:
docs = load_all(source_top)
cur = db.cursor()
cleaned_db = False
try:
cur.execute('DELETE FROM note_search WHERE parser_namespace = ?;', (PARSER_NAMESPACE,))
cleaned_db = True
except sqlite3.OperationalError as err:
if WATCH:
logging.warning("Error pre-cleaning DB, search won't be updated")
else:
raise
# Save posts to DB
for (doc, front_matter, out_path) in docs.values():
cur.execute('''INSERT INTO note_search(note_id, title, body, top_level_title, is_done, is_todo, parser_namespace, url) VALUES (?, ?, ?, ?, ?, ?, ?, ?);''',
(
out_path,
front_matter['title'],
doc,
front_matter['title'],
False,
False,
PARSER_NAMESPACE,
out_path + '/index.html',
))
cur.close()
db.commit()
# Render posts
for (doc, front_matter, out_path) in docs.values():
doc_full_path = os.path.join(dest_top, out_path)
@ -513,7 +550,8 @@ def main(source_top, dest_top):
## Initial load
t0 = time.time()
logging.info("Initial load...")
docs = regen_all(source_top, dest_top)
db = create_db(os.path.join(dest_top, '..', 'db.sqlite3'))
docs = regen_all(source_top, dest_top, db=db)
logging.info("Initial load completed in {:.2f}s".format(time.time() - t0))
if not WATCH:
@ -557,7 +595,7 @@ def main(source_top, dest_top):
if is_static_resource:
logging.info("Updated static resources in {:.2f}s".format(time.time() - t0))
else:
docs = regen_all(source_top, dest_top, docs)
docs = regen_all(source_top, dest_top, docs, db=db)
logging.info("Updated all in {:.2f}s".format(time.time() - t0))
else:

View File

@ -46,14 +46,14 @@ IMG_EXTENSIONS = set([
"gif",
])
SKIPPED_TAGS = set(['attach'])
DEFAULT_SUBPATH = "public"
PARSER_NAMESPACE = 'codigoparallevar.com/notes'
WATCH = True
if os.getenv('WATCH_AND_REBUILD', '1') == '0':
WATCH = False
MIN_HIDDEN_HEADLINE_LEVEL = 2
INDEX_ID = os.getenv("INDEX_ID", "ea48ec1d-f9d4-4fb7-b39a-faa7b6e2ba95")
INDEX_ID = "ea48ec1d-f9d4-4fb7-b39a-faa7b6e2ba95"
SITE_NAME = "Código para llevar"
MONITORED_EVENT_TYPES = (
@ -89,11 +89,9 @@ def is_git_path(path):
return any([chunk == ".git" for chunk in path.split(os.sep)])
def create_db(path):
if os.path.exists(path):
os.unlink(path)
db = sqlite3.connect(path)
db.execute('CREATE VIRTUAL TABLE note_search USING fts5(note_id, title, body, top_level_title, is_done, is_todo, tokenize="trigram");')
db.execute('CREATE VIRTUAL TABLE IF NOT EXISTS note_search USING fts5(note_id, title, body, top_level_title, is_done, is_todo, parser_namespace, url tokenize="trigram");')
db.execute('DELETE FROM note_search WHERE parser_namespace = ?;', (PARSER_NAMESPACE,))
return db
def load_all(top_dir_relative):
@ -121,13 +119,13 @@ def load_all(top_dir_relative):
logging.info("Collected {} files".format(len(docs)))
return docs
def regen_all(src_top, dest_top, subpath, *, docs=None, db=None):
def regen_all(src_top, dest_top, *, docs=None, db=None):
files_generated = 0
cur = db.cursor()
cleaned_db = False
try:
cur.execute('DELETE FROM note_search;')
cur.execute('DELETE FROM note_search WHERE parser_namespace = ?;', (PARSER_NAMESPACE,))
cleaned_db = True
except sqlite3.OperationalError as err:
if WATCH:
@ -151,7 +149,7 @@ def regen_all(src_top, dest_top, subpath, *, docs=None, db=None):
changed = False
headlines = list(doc.getAllHeadlines())
related = None
if not relpath.startswith(subpath + "/"):
if not relpath.startswith("public/"):
# print("Skip:", relpath)
continue
@ -263,7 +261,7 @@ def regen_all(src_top, dest_top, subpath, *, docs=None, db=None):
topLevelHeadline = topLevelHeadline.parent
# Save for full-text-search
cur.execute('''INSERT INTO note_search(note_id, title, body, top_level_title, is_done, is_todo) VALUES (?, ?, ?, ?, ?, ?);''',
cur.execute('''INSERT INTO note_search(note_id, title, body, top_level_title, is_done, is_todo, parser_namespace, url) VALUES (?, ?, ?, ?, ?, ?, ?, ?);''',
(
headline.id,
headline.title.get_text(),
@ -271,6 +269,8 @@ def regen_all(src_top, dest_top, subpath, *, docs=None, db=None):
topLevelHeadline.title.get_text(),
headline.is_done,
headline.is_todo,
PARSER_NAMESPACE,
headline.id + '.node.html',
))
# Update graph, replace document ids with headline ids
@ -350,15 +350,15 @@ def regen_all(src_top, dest_top, subpath, *, docs=None, db=None):
dirs_exist_ok=True)
def main(src_top, dest_top, subpath):
def main(src_top, dest_top):
notifier = inotify.adapters.InotifyTrees([src_top, STATIC_PATH])
## Initial load
t0 = time.time()
os.makedirs(dest_top, exist_ok=True)
db = create_db(os.path.join(dest_top, 'db.sqlite3'))
docs = regen_all(src_top, dest_top, subpath=subpath, db=db)
db = create_db(os.path.join(dest_top, '..', 'db.sqlite3'))
docs = regen_all(src_top, dest_top, db=db)
if not WATCH:
logging.info("Build completed in {:.2f}s".format(time.time() - t0))
@ -376,7 +376,7 @@ def main(src_top, dest_top, subpath):
print("CHANGED: {}".format(filepath))
t0 = time.time()
try:
docs = regen_all(src_top, dest_top, subpath=subpath, docs=docs, db=db)
docs = regen_all(src_top, dest_top, docs=docs, db=db)
except:
logging.error(traceback.format_exc())
logging.error("Loading new templates failed 😿")
@ -826,13 +826,9 @@ def save_changes(doc):
if __name__ == "__main__":
if len(sys.argv) not in (3, 4):
print("Usage: {} SOURCE_TOP DEST_TOP <SUBPATH>".format(sys.argv[0]))
if len(sys.argv) != 3:
print("Usage: {} SOURCE_TOP DEST_TOP".format(sys.argv[0]))
exit(0)
logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s")
subpath = DEFAULT_SUBPATH
if len(sys.argv) == 4:
subpath = sys.argv[3]
exit(main(sys.argv[1], sys.argv[2], subpath=subpath))
exit(main(sys.argv[1], sys.argv[2]))

View File

@ -1,5 +1,3 @@
/* Dark mode. */
@media (prefers-color-scheme: dark) {
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
@ -82,4 +80,3 @@
.vi { color: #f8f8f2 } /* Name.Variable.Instance */
.vm { color: #f8f8f2 } /* Name.Variable.Magic */
.il { color: #ae81ff } /* Literal.Number.Integer.Long */
}

View File

@ -6,13 +6,11 @@
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
body {
background-color: white;
font-family: sans-serif;
margin: 0 auto;
width: fit-content;
max-width: 100ex;
padding: 0 1ex;
color: black;
}
.header h1 {
text-align: center;
@ -47,7 +45,7 @@
border-right: 1px solid #000;
}
@media (prefers-color-scheme: dark) {
body {
html {
background-color: #1d1f21;
color: #fafafe;
}
@ -74,12 +72,6 @@
</div>
<div class="links">
<a href="/notes">
<section>
<h2>Notes</h2>
<p>Some publicly-visible notes from a sort of knowledge graph that I use as information dump.</p>
</section>
</a>
<section>
<h2><a href="/blog">Blog</a></h2>
<p>
@ -108,24 +100,12 @@
</ul>
</p>
</section>
<a href="/notes">
<section>
<h2>Talks / Slides</h2>
<p>
<ul>
<li>
Malleable Software
(<a href="/slides/hackliza2024/software-maleable/software-maleable.odp">galician, </a>
for <a href="https://hackliza.gal">Hackliza</a>
<a href="/slides/hackliza2024/software-maleable/software-maleable.pdf">[PDF]</a>
<a href="/slides/hackliza2024/software-maleable/software-maleable.odp">[ODP]</a>)
(<a href="/slides/eslibre2024/software-maleable.odp">spanish,</a>
for <a href="https://eslib.re/2024/">esLibre 2024</a>
<a href="/slides/eslibre2024/software-maleable.pdf">[PDF]</a>
<a href="/slides/eslibre2024/software-maleable.odp">[ODP]</a>).
</li>
</ul>
</p>
<h2>Notes</h2>
<p>Some publicly-visible notes from a sort of knowledge graph that I use as information dump.</p>
</section>
</a>
<!-- section>
<h2>Projects</h2>
<p>
@ -136,7 +116,7 @@
<section id="social">
<h2>Find me</h2>
<p>
<a href="https://social.codigoparallevar.com/@kenkeiras">ActivityPub</a>
<a href="https://social.codigoparallevar.com/@kenkeiras">Mastodon</a>
<a href="https://github.com/kenkeiras">GitHub</a>
<a href="https://gitlab.com/kenkeiras">GitLab</a>
<a href="https://programaker.com/users/kenkeiras">PrograMaker</a>

View File

@ -10,8 +10,6 @@ body {
max-width: 80ex;
margin: 0 auto;
padding: 0.5ex 1ex;
background-color: white;
color: black;
}
body.blog {
@ -334,11 +332,6 @@ h1.title .state.todo-True {
h1.title .state.todo-False {
background-color: rgba(0,255,0,0.25);
}
h1.title .state.todo-True.state-SOMETIME {
background-color: #ddd;
color: black;
}
h1.title .tags {
float: right;
@ -375,7 +368,6 @@ a.internal::after {
}
a.external::after {
content: ' ↗';
vertical-align: top;
}
/* Markup */
@ -588,7 +580,7 @@ tr.__table-separator {
/* Dark mode. */
@media (prefers-color-scheme: dark) {
html, body {
html {
background-color: #1d1f21;
color: #fafafe;
}