new-codigoparallevar/scripts/blog.py

234 lines
7.2 KiB
Python
Raw Normal View History

2022-06-27 18:39:21 +00:00
#!/usr/bin/env python3
MARKDOWN_EXTENSION = '.md'
EXTENSIONS = [
MARKDOWN_EXTENSION,
]
MARKDOWN_EXTRA_FEATURES = [
# See more in: https://python-markdown.github.io/extensions/
'markdown.extensions.fenced_code',
'markdown.extensions.codehilite',
'markdown.extensions.extra',
]
import json
import logging
import sys
import os
import datetime
import shutil
import traceback
import time
import re
2022-06-27 18:39:21 +00:00
import jinja2
import inotify.adapters
2022-06-27 18:39:21 +00:00
import yaml
import markdown
from unidecode import unidecode
NIKOLA_DATE_RE = re.compile(r'^([0-2]\d|30|31)\.(0\d|1[012])\.(\d{4}), (\d{1,2}):(\d{2})$')
COMPLETE_DATE_RE = re.compile(r'^(\d{4})-(0\d|1[012])-([0-2]\d|30|31) '
+ r'(\d{2}):(\d{2})(:\d{2})( .+)?$')
SLUG_HYPHENATE_RE = re.compile(r'[\s\-]+')
SLUG_REMOVE_RE = re.compile(r'[^\s\-a-zA-Z0-9]*')
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
STATIC_PATH = os.path.join(ROOT_DIR, 'static')
ARTICLE_TEMPLATE_NAME = 'article.tmpl.html'
STATIC_RESOURCES = (
('style.css', 'css/style.css'),
)
JINJA_ENV = jinja2.Environment(
loader=jinja2.FileSystemLoader(STATIC_PATH),
autoescape=jinja2.select_autoescape()
)
def update_statics():
global ARTICLE_TEMPLATE
ARTICLE_TEMPLATE = JINJA_ENV.get_template(ARTICLE_TEMPLATE_NAME)
update_statics()
MONITORED_EVENT_TYPES = (
'IN_CREATE',
# 'IN_MODIFY',
'IN_CLOSE_WRITE',
'IN_DELETE',
'IN_MOVED_FROM',
'IN_MOVED_TO',
'IN_DELETE_SELF',
'IN_MOVE_SELF',
)
2022-06-27 18:39:21 +00:00
def parse_nikola_date(match):
return datetime.datetime(year=int(match.group(3)),
month=int(match.group(2)),
day=int(match.group(1)),
hour=int(match.group(4)),
minute=int(match.group(5)))
2022-06-27 18:39:21 +00:00
def parse_complete_date(match):
return datetime.datetime.strptime(match.group(0), '%Y-%m-%d %H:%M:%S %Z%z')
2022-06-27 18:39:21 +00:00
def slugify(title):
"""
Made for compatibility with Nikola's slugify within CodigoParaLlevar blog.
"""
slug = unidecode(title).lower()
slug = SLUG_REMOVE_RE.sub('', slug)
slug = SLUG_HYPHENATE_RE.sub('-', slug)
return slug.strip()
2022-06-27 18:39:21 +00:00
def read_markdown(path):
with open(path, 'rt') as f:
data = f.read()
if data.startswith('---'):
start = data.index('\n')
if '---\n' not in data[start:]:
raise Exception('Front matter not finished on: {}'.format(path))
front_matter_str, content = data[start:].split('---\n', 1)
front_matter = yaml.load(front_matter_str, Loader=yaml.SafeLoader)
else:
raise Exception('Front matter is needed for proper rendering. Not found on: {}'.format(
path
))
doc = markdown.markdown(content, extensions=MARKDOWN_EXTRA_FEATURES)
return doc, front_matter
def get_out_path(front_matter):
if 'date' in front_matter:
if m := NIKOLA_DATE_RE.match(front_matter['date']):
front_matter['date'] = parse_nikola_date(m)
elif m := COMPLETE_DATE_RE.match(front_matter['date']):
front_matter['date'] = parse_complete_date(m)
else:
raise NotImplementedError('Unknown date format: {}'.format(
front_matter['date']))
else:
raise Exception('No date found on: {}'.format(
path
))
if 'slug' not in front_matter:
if 'title' not in front_matter:
raise Exception('No title found on: {}'.format(
path
))
front_matter['slug'] = slugify(front_matter['title'])
out_path = os.path.join(str(front_matter['date'].year), front_matter['slug'])
return out_path
def load_all(top_dir_relative):
top = os.path.abspath(top_dir_relative)
docs = {}
2022-06-27 18:39:21 +00:00
for root, dirs, files in os.walk(top):
for name in files:
if all([not name.endswith(ext) for ext in EXTENSIONS]):
# The logic is negative... but it works
continue
if name.endswith(MARKDOWN_EXTENSION):
path = os.path.join(root, name)
doc, front_matter = read_markdown(path)
out_path = get_out_path(front_matter)
docs[path] = (doc, front_matter, out_path)
2022-06-27 18:39:21 +00:00
else:
raise NotImplementedError('Unknown filetype: {}'.format(name))
return docs
def load_doc(filepath):
doc, front_matter = read_markdown(filepath)
out_path = get_out_path(front_matter)
return (doc, front_matter, out_path)
def render_article(doc, front_matter, f):
result = ARTICLE_TEMPLATE.render(content=doc, title=front_matter['title'])
2022-06-27 18:39:21 +00:00
f.write(result)
def regen_all(source_top, dest_top, docs=None):
if docs is None:
docs = load_all(source_top)
for (doc, front_matter, out_path) in docs.values():
2022-06-27 18:39:21 +00:00
doc_full_path = os.path.join(dest_top, out_path)
os.makedirs(os.path.dirname(doc_full_path), exist_ok=True)
# print("==", doc_full_path)
2022-06-27 18:39:21 +00:00
with open(doc_full_path + '.html', 'wt') as f:
render_article(doc, front_matter, f)
2022-06-27 18:39:21 +00:00
for src, dest in STATIC_RESOURCES:
target_dest = os.path.join(dest_top, dest)
os.makedirs(os.path.dirname(target_dest), exist_ok=True)
shutil.copy(os.path.join(STATIC_PATH, src), target_dest)
return docs
def main(source_top, dest_top):
notifier = inotify.adapters.InotifyTrees([source_top, STATIC_PATH])
## Initial load
t0 = time.time()
logging.info("Initial load...")
docs = regen_all(source_top, dest_top)
logging.info("Initial load completed in {:.2f}s".format(time.time() - t0))
## Updating
for event in notifier.event_gen(yield_nones=False):
(ev, types, directory, file) = event
if not any([type in MONITORED_EVENT_TYPES for type in types]):
continue
filepath = os.path.join(directory, file)
if filepath.startswith(STATIC_PATH):
t0 = time.time()
update_statics()
for src, dest in STATIC_RESOURCES:
target_dest = os.path.join(dest_top, dest)
os.makedirs(os.path.dirname(target_dest), exist_ok=True)
shutil.copy(os.path.join(STATIC_PATH, src), target_dest)
docs = regen_all(source_top, dest_top, docs)
logging.info("Updated all in {:.2f}s".format(time.time() - t0))
else:
try:
(doc, front_matter, out_path) = load_doc(filepath)
except:
logging.error(traceback.format_exc())
logging.error("Skipping update 😿")
continue
t0 = time.time()
docs[filepath] = (doc, front_matter, out_path)
doc_full_path = os.path.join(dest_top, out_path)
os.makedirs(os.path.dirname(doc_full_path), exist_ok=True)
# print("==", doc_full_path)
with open(doc_full_path + '.html', 'wt') as f:
render_article(doc, front_matter, f)
logging.info("Updated all in {:.2f}s".format(time.time() - t0))
2022-06-27 18:39:21 +00:00
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: {} SOURCE_TOP DEST_TOP".format(sys.argv[0]))
exit(0)
logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s")
main(sys.argv[1], sys.argv[2])