Implement RSS generation.

2023-09-21 00:16:56 +02:00 · 2023-09-21 00:16:56 +02:00 · 5b0873b0bd
commit 5b0873b0bd
parent fc1a94cfcf
2 changed files with 75 additions and 1 deletions
--- a/scripts/blog.py
+++ b/scripts/blog.py
@ -33,6 +33,7 @@ import markdown
 from unidecode import unidecode
 SUMMARIZE_MAX_TOKENS = 1000
 ITEMS_IN_RSS = 50
 NIKOLA_DATE_RE = re.compile(r'^([0-2]\d|30|31)\.(0\d|1[012])\.(\d{4}), (\d{1,2}):(\d{2})$')
@ -48,6 +49,7 @@ ARTICLE_TEMPLATE_NAME = 'article.tmpl.html'
 BLOG_INDEX_TEMPLATE_NAME = 'blog_index.tmpl.html'
 CATEGORY_LIST_TEMPLATE_NAME = 'category_list.tmpl.html'
 ARTICLE_LIST_TEMPLATE_NAME = 'article_list.tmpl.html'
 RSS_TEMPLATE_NAME = 'rss.tmpl.xml'
 BLOG_INDEX_PAGE_SIZE = 10
 STATIC_RESOURCES = (
@ -70,6 +72,8 @@ def update_statics():
    CATEGORY_LIST_TEMPLATE = JINJA_ENV.get_template(CATEGORY_LIST_TEMPLATE_NAME)
    global ARTICLE_LIST_TEMPLATE
    ARTICLE_LIST_TEMPLATE = JINJA_ENV.get_template(ARTICLE_LIST_TEMPLATE_NAME)
    global RSS_TEMPLATE
    RSS_TEMPLATE = JINJA_ENV.get_template(RSS_TEMPLATE_NAME)
 update_statics()
@ -283,7 +287,9 @@ def summarize(doc):
        assert teaser_end is not None, 'Error finding teaser end on copy'
        cut_after_element(teaser_end)
-        result = summary
+        result = bs4()
        for child in summary.children:
            result.append(child)
    # Update summary links and hrefs
    for v in result.find_all('video') + result.find_all('image'):
@ -410,6 +416,49 @@ def render_archive(docs, dest_top):
    with open(path, 'wt') as f:
        f.write(result)
 def render_rss(docs, dest_top):
    # Collect all languages accepted for all docs
    docs_by_slug = {}
    for (doc, front_matter, out_path) in docs.values():
        if front_matter['slug'] not in docs_by_slug:
            docs_by_slug[front_matter['slug']] = {}
        docs_by_slug[front_matter['slug']][front_matter.get('lang', LANG_PRIORITY[0])] = (doc, front_matter, out_path)
    # Remove duplicated for langs with less priority
    selected_docs = []
    for (doc, front_matter, out_path) in docs.values():
        langs = docs_by_slug[front_matter['slug']]
        lang_priority = LANG_PRIORITY.index(front_matter.get('lang', LANG_PRIORITY[0]))
        min_lang_priority = min([
            LANG_PRIORITY.index(lang)
            for lang in langs.keys()
        ])
        if lang_priority == min_lang_priority:
            selected_docs.append((doc, front_matter, out_path, langs))
    docs = sorted(selected_docs, key=lambda x: x[1]['date'], reverse=True)
    posts = [
        {
            # "doc": doc,
            "title": front_matter['title'],
            "post_publication_date": front_matter['date'],
            "post_tags": split_tags(front_matter['tags']),
            "summary": summarize(doc),
            "link": out_path.rstrip('/') + '/',
        }
        for (doc, front_matter, out_path, langs) in docs[:ITEMS_IN_RSS]
    ]
    result = RSS_TEMPLATE.render(
        posts=posts,
        last_build_date=datetime.datetime.utcnow(),
    )
    path = os.path.join(dest_top, "rss.xml")
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, 'wt') as f:
        f.write(result)
 def regen_all(source_top, dest_top, docs=None):
    if docs is None:
@ -456,6 +505,9 @@ def regen_all(source_top, dest_top, docs=None):
    # Render archive
    render_archive(docs, dest_top)
    # Render RSS
    render_rss(docs, dest_top)
    return docs
--- a/static/rss.tmpl.xml
+++ b/static/rss.tmpl.xml
@ -0,0 +1,22 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Código para llevar</title>
    <link>https://codigoparallevar.com/blog/</link>
    <description>Blog from a programmer adrift.</description>
    <atom:link href="https://codigoparallevar.com/blog/rss.xml" rel="self" type="application/rss+xml"></atom:link>
    <language>en</language>
    <copyright>Contents © 2023 kenkeiras - Creative Commons License 4.0 BY-NC-SA</copyright>
    <lastBuildDate>{{ last_build_date.strftime("%a, %d %b %Y %H:%M:%S %z") }}</lastBuildDate>
    <ttl>3600</ttl>
    {% for post in posts %}
    <item>
      <title>{{ post.title }}</title>
      <description>{{ post.summary }}</description>
      <link>https://codigoparallevar.com/blog/{{ post.link }}</link>
      <pubDate>{{ post.post_publication_date.strftime("%a, %d %b %Y %H:%M:%S %z") }}</pubDate>
    </item>
    {% endfor %}
  </channel>
 </rss>