new-codigoparallevar/scripts/blog.py

#!/usr/bin/env python3

MARKDOWN_EXTENSION = '.md'
EXTENSIONS = [
    MARKDOWN_EXTENSION,
]

MARKDOWN_EXTRA_FEATURES = [
    # See more in: https://python-markdown.github.io/extensions/
    'markdown.extensions.fenced_code',
    'markdown.extensions.codehilite',
    'markdown.extensions.extra',
]

import json
import logging
import sys
import os
import datetime
import shutil
import traceback
import time
import re
from typing import List

import jinja2
import inotify.adapters
import yaml
import markdown
from unidecode import unidecode

NIKOLA_DATE_RE = re.compile(r'^([0-2]\d|30|31)\.(0\d|1[012])\.(\d{4}), (\d{1,2}):(\d{2})$')

COMPLETE_DATE_RE = re.compile(r'^(\d{4})-(0\d|1[012])-([0-2]\d|30|31) '
                            + r'(\d{2}):(\d{2})(:\d{2})( .+)?$')
SLUG_HYPHENATE_RE = re.compile(r'[\s\-]+')
SLUG_REMOVE_RE = re.compile(r'[^\s\-a-zA-Z0-9]*')

ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

STATIC_PATH = os.path.join(ROOT_DIR, 'static')
ARTICLE_TEMPLATE_NAME = 'article.tmpl.html'
STATIC_RESOURCES = (
    ('style.css', 'css/style.css'),
    ('light-syntax.css', 'css/light-syntax.css'),
    ('dark-syntax.css', 'css/dark-syntax.css', ('@media (prefers-color-scheme: dark) {\n', '\n}')),
)

JINJA_ENV = jinja2.Environment(
    loader=jinja2.FileSystemLoader(STATIC_PATH),
    autoescape=jinja2.select_autoescape()
)

def update_statics():
    global ARTICLE_TEMPLATE
    ARTICLE_TEMPLATE = JINJA_ENV.get_template(ARTICLE_TEMPLATE_NAME)

update_statics()

MONITORED_EVENT_TYPES = (
    'IN_CREATE',
    # 'IN_MODIFY',
    'IN_CLOSE_WRITE',
    'IN_DELETE',
    'IN_MOVED_FROM',
    'IN_MOVED_TO',
    'IN_DELETE_SELF',
    'IN_MOVE_SELF',
)


def parse_nikola_date(match):
    return datetime.datetime(year=int(match.group(3)),
                             month=int(match.group(2)),
                             day=int(match.group(1)),
                             hour=int(match.group(4)),
                             minute=int(match.group(5)))


def parse_complete_date(match):
    return datetime.datetime.strptime(match.group(0), '%Y-%m-%d %H:%M:%S %Z%z')

def split_tags(tags: str) -> List[str]:
    if isinstance(tags, str):
        return [tag.strip() for tag in tags.split(',')]
    elif isinstance(tags, list):
        return tags
    else:
        raise NotImplementedError("Unknown tag type: {}".format(type(tags)))

def slugify(title):
    """
    Made for compatibility with Nikola's slugify within CodigoParaLlevar blog.
    """
    slug = unidecode(title).lower()
    slug = SLUG_REMOVE_RE.sub('', slug)
    slug = SLUG_HYPHENATE_RE.sub('-', slug)

    return slug.strip()


def read_markdown(path):
    with open(path, 'rt') as f:
        data = f.read()
    if data.startswith('---'):
        start = data.index('\n')
        if '---\n' not in data[start:]:
            raise Exception('Front matter not finished on: {}'.format(path))
        front_matter_str, content = data[start:].split('---\n', 1)
        front_matter = yaml.load(front_matter_str, Loader=yaml.SafeLoader)
    else:
        raise Exception('Front matter is needed for proper rendering. Not found on: {}'.format(
            path
        ))
    doc = markdown.markdown(content, extensions=MARKDOWN_EXTRA_FEATURES)
    return doc, front_matter


def get_out_path(front_matter):
    if 'date' in front_matter:
        if m := NIKOLA_DATE_RE.match(front_matter['date']):
            front_matter['date'] = parse_nikola_date(m)
        elif m := COMPLETE_DATE_RE.match(front_matter['date']):
            front_matter['date'] = parse_complete_date(m)
        else:
            raise NotImplementedError('Unknown date format: {}'.format(
                front_matter['date']))
    else:
        raise Exception('No date found on: {}'.format(
            path
        ))

    if 'slug' not in front_matter:
        if 'title' not in front_matter:
            raise Exception('No title found on: {}'.format(
                path
            ))

        front_matter['slug'] = slugify(front_matter['title'])

    out_path = os.path.join(str(front_matter['date'].year), front_matter['slug'])
    return out_path


def load_all(top_dir_relative):
    top = os.path.abspath(top_dir_relative)

    docs = {}

    for root, dirs, files in os.walk(top):
        for name in files:
            if all([not name.endswith(ext) for ext in EXTENSIONS]):
                # The logic is negative... but it works
                continue

            if name.endswith(MARKDOWN_EXTENSION):
                path = os.path.join(root, name)
                doc, front_matter = read_markdown(path)
                out_path = get_out_path(front_matter)
                docs[path] = (doc, front_matter, out_path)
            else:
                raise NotImplementedError('Unknown filetype: {}'.format(name))

    return docs


def load_doc(filepath):
    doc, front_matter = read_markdown(filepath)
    out_path = get_out_path(front_matter)
    return (doc, front_matter, out_path)


def render_article(doc, front_matter, f):
    result = ARTICLE_TEMPLATE.render(
        content=doc,
        title=front_matter['title'],
        post_publication_date=front_matter['date'],
        post_tags=split_tags(front_matter['tags']),
    )
    f.write(result)

def regen_all(source_top, dest_top, docs=None):
    if docs is None:
        docs = load_all(source_top)
    for (doc, front_matter, out_path) in docs.values():
        doc_full_path = os.path.join(dest_top, out_path)
        os.makedirs(os.path.dirname(doc_full_path), exist_ok=True)
        # print("==", doc_full_path)
        with open(doc_full_path + '.html', 'wt') as f:
            try:
                render_article(doc, front_matter, f)
            except:
                logging.error(traceback.format_exc())
                logging.error("Rendering failed 😿")
                continue


    for static in STATIC_RESOURCES:
        src_path = static[0]
        dest_path = static[1]

        if len(static) > 2:
            before, after = static[2]
        else:
            before, after = '', ''
        target_dest = os.path.join(dest_top, dest_path)
        os.makedirs(os.path.dirname(target_dest), exist_ok=True)
        with open(os.path.join(STATIC_PATH, src_path), 'rt') as src:
            data = before + src.read() + after

        with open(target_dest, 'wt') as f:
            f.write(data)

    return docs


def main(source_top, dest_top):
    notifier = inotify.adapters.InotifyTrees([source_top, STATIC_PATH])

    ## Initial load
    t0 = time.time()
    logging.info("Initial load...")
    docs = regen_all(source_top, dest_top)
    logging.info("Initial load completed in {:.2f}s".format(time.time() - t0))

    ## Updating
    for event in notifier.event_gen(yield_nones=False):
        (ev, types, directory, file) = event
        if not any([type in MONITORED_EVENT_TYPES for type in types]):
            continue
        filepath = os.path.join(directory, file)
        if filepath.startswith(STATIC_PATH):
            t0 = time.time()
            try:
                update_statics()
            except:
                logging.error(traceback.format_exc())
                logging.error("Loading new templates failed 😿")
                continue

            is_static_resource = False
            for static  in STATIC_RESOURCES:
                src_path = static[0]
                dest_path = static[1]
                if file == os.path.basename(src_path):
                    is_static_resource = True

                if len(static) > 2:
                    before, after = static[2]
                else:
                    before, after = '', ''
                target_dest = os.path.join(dest_top, dest_path)
                os.makedirs(os.path.dirname(target_dest), exist_ok=True)
                with open(os.path.join(STATIC_PATH, src_path), 'rt') as src:
                    data = before + src.read() + after

                with open(target_dest, 'wt') as f:
                    f.write(data)

            if is_static_resource:
                logging.info("Updated static resources in {:.2f}s".format(time.time() - t0))
            else:
                docs = regen_all(source_top, dest_top, docs)
                logging.info("Updated all in {:.2f}s".format(time.time() - t0))

        else:
            try:
                (doc, front_matter, out_path) = load_doc(filepath)
            except:
                logging.error(traceback.format_exc())
                logging.error("Skipping update 😿")
                continue

            t0 = time.time()
            docs[filepath] = (doc, front_matter, out_path)
            doc_full_path = os.path.join(dest_top, out_path)
            os.makedirs(os.path.dirname(doc_full_path), exist_ok=True)
            # print("==", doc_full_path)
            with open(doc_full_path + '.html', 'wt') as f:
                try:
                    render_article(doc, front_matter, f)
                except:
                    logging.error(traceback.format_exc())
                    logging.error("Rendering failed 😿")
                    continue

            logging.info("Updated all in {:.2f}s".format(time.time() - t0))


if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: {} SOURCE_TOP DEST_TOP".format(sys.argv[0]))
        exit(0)

    logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s")
    main(sys.argv[1], sys.argv[2])