Implement RSS generation.

2023-09-21 00:16:56 +02:00 · 2023-09-21 00:16:56 +02:00 · 5b0873b0bd
commit 5b0873b0bd
parent fc1a94cfcf
2 changed files with 75 additions and 1 deletions
--- a/scripts/blog.py
+++ b/scripts/blog.py
@ -33,6 +33,7 @@ import markdown
 from unidecode import unidecode

 SUMMARIZE_MAX_TOKENS = 1000
+ITEMS_IN_RSS = 50

 NIKOLA_DATE_RE = re.compile(r'^([0-2]\d|30|31)\.(0\d|1[012])\.(\d{4}), (\d{1,2}):(\d{2})$')

@ -48,6 +49,7 @@ ARTICLE_TEMPLATE_NAME = 'article.tmpl.html'
 BLOG_INDEX_TEMPLATE_NAME = 'blog_index.tmpl.html'
 CATEGORY_LIST_TEMPLATE_NAME = 'category_list.tmpl.html'
 ARTICLE_LIST_TEMPLATE_NAME = 'article_list.tmpl.html'
+RSS_TEMPLATE_NAME = 'rss.tmpl.xml'
 BLOG_INDEX_PAGE_SIZE = 10

 STATIC_RESOURCES = (
@ -70,6 +72,8 @@ def update_statics():
    CATEGORY_LIST_TEMPLATE = JINJA_ENV.get_template(CATEGORY_LIST_TEMPLATE_NAME)
    global ARTICLE_LIST_TEMPLATE
    ARTICLE_LIST_TEMPLATE = JINJA_ENV.get_template(ARTICLE_LIST_TEMPLATE_NAME)
+    global RSS_TEMPLATE
+    RSS_TEMPLATE = JINJA_ENV.get_template(RSS_TEMPLATE_NAME)

 update_statics()

@ -283,7 +287,9 @@ def summarize(doc):
        assert teaser_end is not None, 'Error finding teaser end on copy'

        cut_after_element(teaser_end)
-        result = summary
+        result = bs4()
+        for child in summary.children:
+            result.append(child)

    # Update summary links and hrefs
    for v in result.find_all('video') + result.find_all('image'):
@ -410,6 +416,49 @@ def render_archive(docs, dest_top):
    with open(path, 'wt') as f:
        f.write(result)

+def render_rss(docs, dest_top):
+    # Collect all languages accepted for all docs
+    docs_by_slug = {}
+    for (doc, front_matter, out_path) in docs.values():
+        if front_matter['slug'] not in docs_by_slug:
+            docs_by_slug[front_matter['slug']] = {}
+        docs_by_slug[front_matter['slug']][front_matter.get('lang', LANG_PRIORITY[0])] = (doc, front_matter, out_path)
+
+    # Remove duplicated for langs with less priority
+    selected_docs = []
+    for (doc, front_matter, out_path) in docs.values():
+        langs = docs_by_slug[front_matter['slug']]
+        lang_priority = LANG_PRIORITY.index(front_matter.get('lang', LANG_PRIORITY[0]))
+        min_lang_priority = min([
+            LANG_PRIORITY.index(lang)
+            for lang in langs.keys()
+        ])
+        if lang_priority == min_lang_priority:
+            selected_docs.append((doc, front_matter, out_path, langs))
+
+    docs = sorted(selected_docs, key=lambda x: x[1]['date'], reverse=True)
+
+    posts = [
+        {
+            # "doc": doc,
+            "title": front_matter['title'],
+            "post_publication_date": front_matter['date'],
+            "post_tags": split_tags(front_matter['tags']),
+            "summary": summarize(doc),
+            "link": out_path.rstrip('/') + '/',
+        }
+        for (doc, front_matter, out_path, langs) in docs[:ITEMS_IN_RSS]
+    ]
+
+    result = RSS_TEMPLATE.render(
+        posts=posts,
+        last_build_date=datetime.datetime.utcnow(),
+    )
+    path = os.path.join(dest_top, "rss.xml")
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    with open(path, 'wt') as f:
+        f.write(result)
+

 def regen_all(source_top, dest_top, docs=None):
    if docs is None:
@ -456,6 +505,9 @@ def regen_all(source_top, dest_top, docs=None):
    # Render archive
    render_archive(docs, dest_top)

+    # Render RSS
+    render_rss(docs, dest_top)
+
    return docs


--- a/static/rss.tmpl.xml
+++ b/static/rss.tmpl.xml
@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
+  <channel>
+    <title>Código para llevar</title>
+    <link>https://codigoparallevar.com/blog/</link>
+    <description>Blog from a programmer adrift.</description>
+    <atom:link href="https://codigoparallevar.com/blog/rss.xml" rel="self" type="application/rss+xml"></atom:link>
+    <language>en</language>
+    <copyright>Contents © 2023 kenkeiras - Creative Commons License 4.0 BY-NC-SA</copyright>
+    <lastBuildDate>{{ last_build_date.strftime("%a, %d %b %Y %H:%M:%S %z") }}</lastBuildDate>
+    <ttl>3600</ttl>
+
+    {% for post in posts %}
+    <item>
+      <title>{{ post.title }}</title>
+      <description>{{ post.summary }}</description>
+      <link>https://codigoparallevar.com/blog/{{ post.link }}</link>
+      <pubDate>{{ post.post_publication_date.strftime("%a, %d %b %Y %H:%M:%S %z") }}</pubDate>
+    </item>
+    {% endfor %}
+  </channel>
+</rss>