wip: Script to test links for broken ones.

This commit is contained in:
Sergio Martínez Portela 2023-09-22 00:05:24 +02:00
parent 5b0873b0bd
commit 816cedea4d

51
scripts/test-links.py Normal file
View File

@ -0,0 +1,51 @@
#!/usr/bin/env python3
import logging
import os
import sys
from bs4 import BeautifulSoup as bs4
from tqdm import tqdm
def main(files_top):
print("Listing files...")
found_files = []
for root, dirs, files in os.walk(files_top):
for name in files:
if name.endswith('.html'):
found_files.append(os.path.join(root, name))
print("\r{} files".format(len(found_files)), end='', flush=True)
print()
found_broken = False
for fpath in tqdm(found_files):
with open(fpath) as f:
tree = bs4(f.read(), features='lxml', parser='html5')
for link in tree.find_all('a'):
if 'href' not in link.attrs:
continue
if ':' in link['href']:
continue
if link['href'].startswith('/'):
target = link['href'] # TODO: Find a better way to model the root
else:
target = os.path.join(os.path.dirname(fpath), link['href'])
if os.path.isdir(target):
pass
elif not os.path.exists(target):
print("[{}] -[ error ]-> {} | {}".format(fpath, target, link['href']))
if found_broken:
exit(1)
else:
exit(0)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: {} FILES_TOP".format(sys.argv[0]))
exit(0)
logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s")
exit(main(sys.argv[1]))