Implement RSS generation.

This commit is contained in:
Sergio Martínez Portela 2023-09-21 00:16:56 +02:00
parent fc1a94cfcf
commit 5b0873b0bd
2 changed files with 75 additions and 1 deletions

View File

@ -33,6 +33,7 @@ import markdown
from unidecode import unidecode from unidecode import unidecode
SUMMARIZE_MAX_TOKENS = 1000 SUMMARIZE_MAX_TOKENS = 1000
ITEMS_IN_RSS = 50
NIKOLA_DATE_RE = re.compile(r'^([0-2]\d|30|31)\.(0\d|1[012])\.(\d{4}), (\d{1,2}):(\d{2})$') NIKOLA_DATE_RE = re.compile(r'^([0-2]\d|30|31)\.(0\d|1[012])\.(\d{4}), (\d{1,2}):(\d{2})$')
@ -48,6 +49,7 @@ ARTICLE_TEMPLATE_NAME = 'article.tmpl.html'
BLOG_INDEX_TEMPLATE_NAME = 'blog_index.tmpl.html' BLOG_INDEX_TEMPLATE_NAME = 'blog_index.tmpl.html'
CATEGORY_LIST_TEMPLATE_NAME = 'category_list.tmpl.html' CATEGORY_LIST_TEMPLATE_NAME = 'category_list.tmpl.html'
ARTICLE_LIST_TEMPLATE_NAME = 'article_list.tmpl.html' ARTICLE_LIST_TEMPLATE_NAME = 'article_list.tmpl.html'
RSS_TEMPLATE_NAME = 'rss.tmpl.xml'
BLOG_INDEX_PAGE_SIZE = 10 BLOG_INDEX_PAGE_SIZE = 10
STATIC_RESOURCES = ( STATIC_RESOURCES = (
@ -70,6 +72,8 @@ def update_statics():
CATEGORY_LIST_TEMPLATE = JINJA_ENV.get_template(CATEGORY_LIST_TEMPLATE_NAME) CATEGORY_LIST_TEMPLATE = JINJA_ENV.get_template(CATEGORY_LIST_TEMPLATE_NAME)
global ARTICLE_LIST_TEMPLATE global ARTICLE_LIST_TEMPLATE
ARTICLE_LIST_TEMPLATE = JINJA_ENV.get_template(ARTICLE_LIST_TEMPLATE_NAME) ARTICLE_LIST_TEMPLATE = JINJA_ENV.get_template(ARTICLE_LIST_TEMPLATE_NAME)
global RSS_TEMPLATE
RSS_TEMPLATE = JINJA_ENV.get_template(RSS_TEMPLATE_NAME)
update_statics() update_statics()
@ -283,7 +287,9 @@ def summarize(doc):
assert teaser_end is not None, 'Error finding teaser end on copy' assert teaser_end is not None, 'Error finding teaser end on copy'
cut_after_element(teaser_end) cut_after_element(teaser_end)
result = summary result = bs4()
for child in summary.children:
result.append(child)
# Update summary links and hrefs # Update summary links and hrefs
for v in result.find_all('video') + result.find_all('image'): for v in result.find_all('video') + result.find_all('image'):
@ -410,6 +416,49 @@ def render_archive(docs, dest_top):
with open(path, 'wt') as f: with open(path, 'wt') as f:
f.write(result) f.write(result)
def render_rss(docs, dest_top):
# Collect all languages accepted for all docs
docs_by_slug = {}
for (doc, front_matter, out_path) in docs.values():
if front_matter['slug'] not in docs_by_slug:
docs_by_slug[front_matter['slug']] = {}
docs_by_slug[front_matter['slug']][front_matter.get('lang', LANG_PRIORITY[0])] = (doc, front_matter, out_path)
# Remove duplicated for langs with less priority
selected_docs = []
for (doc, front_matter, out_path) in docs.values():
langs = docs_by_slug[front_matter['slug']]
lang_priority = LANG_PRIORITY.index(front_matter.get('lang', LANG_PRIORITY[0]))
min_lang_priority = min([
LANG_PRIORITY.index(lang)
for lang in langs.keys()
])
if lang_priority == min_lang_priority:
selected_docs.append((doc, front_matter, out_path, langs))
docs = sorted(selected_docs, key=lambda x: x[1]['date'], reverse=True)
posts = [
{
# "doc": doc,
"title": front_matter['title'],
"post_publication_date": front_matter['date'],
"post_tags": split_tags(front_matter['tags']),
"summary": summarize(doc),
"link": out_path.rstrip('/') + '/',
}
for (doc, front_matter, out_path, langs) in docs[:ITEMS_IN_RSS]
]
result = RSS_TEMPLATE.render(
posts=posts,
last_build_date=datetime.datetime.utcnow(),
)
path = os.path.join(dest_top, "rss.xml")
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'wt') as f:
f.write(result)
def regen_all(source_top, dest_top, docs=None): def regen_all(source_top, dest_top, docs=None):
if docs is None: if docs is None:
@ -456,6 +505,9 @@ def regen_all(source_top, dest_top, docs=None):
# Render archive # Render archive
render_archive(docs, dest_top) render_archive(docs, dest_top)
# Render RSS
render_rss(docs, dest_top)
return docs return docs

22
static/rss.tmpl.xml Normal file
View File

@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>Código para llevar</title>
<link>https://codigoparallevar.com/blog/</link>
<description>Blog from a programmer adrift.</description>
<atom:link href="https://codigoparallevar.com/blog/rss.xml" rel="self" type="application/rss+xml"></atom:link>
<language>en</language>
<copyright>Contents © 2023 kenkeiras - Creative Commons License 4.0 BY-NC-SA</copyright>
<lastBuildDate>{{ last_build_date.strftime("%a, %d %b %Y %H:%M:%S %z") }}</lastBuildDate>
<ttl>3600</ttl>
{% for post in posts %}
<item>
<title>{{ post.title }}</title>
<description>{{ post.summary }}</description>
<link>https://codigoparallevar.com/blog/{{ post.link }}</link>
<pubDate>{{ post.post_publication_date.strftime("%a, %d %b %Y %H:%M:%S %z") }}</pubDate>
</item>
{% endfor %}
</channel>
</rss>