Improve link checking.

Re-render archives on file changes.
Fix directory creation on re-render.
2023-10-03 00:09:30 +02:00 · 2023-10-03 00:09:30 +02:00 · 2023-10-03 00:09:30 +02:00 · 2023-10-03 00:09:30 +02:00 · 2023-10-03 00:09:30 +02:00 · 2023-10-03 00:07:45 +02:00
7 changed files with 43 additions and 30 deletions
--- a/scripts/blog.py
+++ b/scripts/blog.py
@ -63,6 +63,10 @@ JINJA_ENV = jinja2.Environment(
    autoescape=jinja2.select_autoescape()
 )
 WATCH = True
 if os.getenv('WATCH_AND_REBUILD', '1') == '0':
    WATCH = False
 def update_statics():
    global ARTICLE_TEMPLATE
    ARTICLE_TEMPLATE = JINJA_ENV.get_template(ARTICLE_TEMPLATE_NAME)
@ -122,6 +126,7 @@ def slugify(title):
    slug = unidecode(title).lower()
    slug = SLUG_REMOVE_RE.sub('', slug)
    slug = SLUG_HYPHENATE_RE.sub('-', slug)
    slug = slug.strip('-')
    return slug.strip()
@ -167,7 +172,7 @@ def get_out_path(front_matter):
    out_path = os.path.join(str(front_matter['date'].year), front_matter['slug'])
    if front_matter.get('lang', LANG_PRIORITY[0]) != LANG_PRIORITY[0]:
-        out_path = os.path.join(str(front_matter['date'].year), front_matter['lang'], front_matter['slug'])
+        out_path = os.path.join(front_matter['lang'], str(front_matter['date'].year), front_matter['slug'])
    return out_path
@ -291,15 +296,6 @@ def summarize(doc):
        for child in summary.children:
            result.append(child)
    # Update summary links and hrefs
    for v in result.find_all('video') + result.find_all('image'):
        if 'src' in v.attrs and ':' not in v['src']:
            v['src'] = '/blog/' + v['src'].lstrip('/')
    for v in result.find_all('a'):
        if 'href' in v.attrs and ':' not in v['href']:
            v['href'] = '/blog/' + v['href'].lstrip('/')
    return result
 def render_index(docs, dest_top):
@ -388,7 +384,7 @@ def render_categories(docs, dest_top):
        result = CATEGORY_LIST_TEMPLATE.render(
            posts=posts,
        )
-        path = os.path.join(dest_top, "tags", tag, "index.html")
+        path = os.path.join(dest_top, "tags", tag.replace('/', '_'), "index.html")
        os.makedirs(os.path.dirname(path), exist_ok=True)
        with open(path, 'wt') as f:
            f.write(result)
@ -520,6 +516,10 @@ def main(source_top, dest_top):
    docs = regen_all(source_top, dest_top)
    logging.info("Initial load completed in {:.2f}s".format(time.time() - t0))
    if not WATCH:
        logging.info("Build completed in {:.2f}s".format(time.time() - t0))
        return 0
    ## Updating
    for event in notifier.event_gen(yield_nones=False):
        (ev, types, directory, file) = event
@ -573,11 +573,12 @@ def main(source_top, dest_top):
            docs[filepath] = (doc, front_matter, out_path)
            doc_full_path = os.path.join(dest_top, out_path)
            print("Updated: {}.html".format(doc_full_path))
-            os.makedirs(os.path.dirname(doc_full_path), exist_ok=True)
+            os.makedirs(os.path.dirname(doc_full_path + '/index.html'), exist_ok=True)
            # print("==", doc_full_path)
            with open(doc_full_path + '/index.html', 'wt') as f:
                try:
                    render_article(doc, front_matter, f, out_path)
                    render_archive(docs, dest_top)
                except:
                    logging.error(traceback.format_exc())
                    logging.error("Rendering failed 😿")
--- a/scripts/test-links.py
+++ b/scripts/test-links.py
@ -3,6 +3,7 @@
 import logging
 import os
 import sys
 import urllib.parse
 from bs4 import BeautifulSoup as bs4
@ -19,25 +20,32 @@ def main(files_top):
                print("\r{} files".format(len(found_files)), end='', flush=True)
    print()
-    found_broken = False
+    found_broken = 0
    for fpath in tqdm(found_files):
        with open(fpath) as f:
            tree = bs4(f.read(), features='lxml', parser='html5')
-        for link in tree.find_all('a'):
+
-            if 'href' not in link.attrs:
+        for tag, attr in [('a', 'href'), ('img', 'src'), ('audio', 'src'), ('video', 'src')]:
            for link in tree.find_all(tag):
                if attr not in link.attrs:
                    continue
-            if ':' in link['href']:
+                link.attrs[attr] = link.attrs[attr].split('#')[0]
                if not link.attrs[attr]:
                    continue
-            if link['href'].startswith('/'):
+                if ':' in link[attr]:
-                target = link['href']  # TODO: Find a better way to model the root
+                    continue
                if link[attr].startswith('/'):
                    target = os.path.join(os.path.abspath(files_top), urllib.parse.unquote(link[attr].lstrip('/')))
                else:
-                target = os.path.join(os.path.dirname(fpath), link['href'])
+                    target = os.path.join(os.path.dirname(fpath), urllib.parse.unquote(link[attr]))
                if os.path.isdir(target):
                    pass
                elif not os.path.exists(target):
-                print("[{}] -[ error ]-> {} | {}".format(fpath, target, link['href']))
+                    print("[{}] -[ error ]-> {} | {}".format(fpath, target, link[attr]))
                    found_broken += 1
    if found_broken:
        print(f"Found {found_broken} broken links")
        exit(1)
    else:
        exit(0)
--- a/scripts/upload.sh
+++ b/scripts/upload.sh
@ -13,11 +13,15 @@ cd ../scripts
 rm -Rf ../_gen/notes
 WATCH_AND_REBUILD=0 python3 generate.py ~/.logs/brain ../_gen/notes
 rm -Rf ../_gen/blog
 WATCH_AND_REBUILD=0 python3 blog.py ~/cloud/nextcloud/blog/posts/ ../_gen/blog
 # Upload notes
 cd ../_gen
 rsync -HPaz static/ --delete-after --exclude='*.html' root@codigoparallevar.com:/mnt/vols/misc/codigoparallevar/static/
 rsync -HPaz notes/ --delete-after --exclude='xapian' --exclude='*.sqlite3' root@codigoparallevar.com:/mnt/vols/misc/codigoparallevar/notes/
 rsync -HPaz notes/db.sqlite3 root@codigoparallevar.com:/mnt/vols/misc/codigoparallevar-api/
 rsync -HPaz blog/ --delete-after --exclude='xapian' --exclude='*.sqlite3' root@codigoparallevar.com:/mnt/vols/misc/codigoparallevar/blog/
 # Restart API server
 ssh root@codigoparallevar.com docker restart notes-api-server
--- a/static/article.tmpl.html
+++ b/static/article.tmpl.html
@ -38,7 +38,7 @@
            </time>
            <ul class="post-tags">
              {% for post_tag in post_tags %}
-              <li class="post-tag"><a href="{{ base_path }}/tags/{{ post_tag }}/"</a>{{ post_tag }}</a></li>
+              <li class="post-tag"><a href="{{ base_path }}/tags/{{ post_tag |urlencode|replace('/', '_') }}/"</a>{{ post_tag }}</a></li>
              {% endfor %}
            </ul>
          </div>
--- a/static/article_list.tmpl.html
+++ b/static/article_list.tmpl.html
@ -42,7 +42,7 @@
            <div class="post-metadata">
              <ul class="post-tags">
                {% for post_tag in post.post_tags %}
-                  <li class="post-tag"><a href="../tags/{{ post_tag }}/"</a>{{ post_tag }}</a></li>
+                  <li class="post-tag"><a href="../tags/{{ post_tag |urlencode|replace('/', '_') }}/">{{ post_tag }}</a></li>
                {% endfor %}
              </ul>
            </div>
--- a/static/blog_index.tmpl.html
+++ b/static/blog_index.tmpl.html
@ -40,7 +40,7 @@
              </time>
              <ul class="post-tags">
                {% for post_tag in post.post_tags %}
-                <li class="post-tag"><a href="tags/{{ post_tag }}/"</a>{{ post_tag }}</a></li>
+                <li class="post-tag"><a href="tags/{{ post_tag |urlencode|replace('/', '_') }}/"</a>{{ post_tag }}</a></li>
                {% endfor %}
              </ul>
            </div>
--- a/static/category_list.tmpl.html
+++ b/static/category_list.tmpl.html
@ -42,7 +42,7 @@
            <div class="post-metadata">
              <ul class="post-tags">
                {% for post_tag in post.post_tags %}
-                  <li class="post-tag"><a href="../../tags/{{ post_tag }}/"</a>{{ post_tag }}</a></li>
+                  <li class="post-tag"><a href="../../tags/{{ post_tag |urlencode|replace('/', '_') }}/">{{ post_tag }}</a></li>
                {% endfor %}
              </ul>
            </div>
Author	SHA1	Message	Date
Sergio Martínez Portela	23f8fcefe5	Improve link checking.	2023-10-03 00:09:30 +02:00
Sergio Martínez Portela	600e737767	Re-render archives on file changes.	2023-10-03 00:09:30 +02:00
Sergio Martínez Portela	c588187ae3	Fix directory creation on re-render.	2023-10-03 00:09:30 +02:00
Sergio Martínez Portela	bd644e3788	No need to re-link videos or hrefs.	2023-10-03 00:09:30 +02:00
Sergio Martínez Portela	650b16df32	Add no-watch build on blog.	2023-10-03 00:09:30 +02:00
Sergio Martínez Portela	abfd4b16c5	Fix blog linking.	2023-10-03 00:07:45 +02:00