wip: Script to test links for broken ones.
This commit is contained in:
parent
5b0873b0bd
commit
816cedea4d
51
scripts/test-links.py
Normal file
51
scripts/test-links.py
Normal file
@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
from bs4 import BeautifulSoup as bs4
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
def main(files_top):
|
||||
|
||||
print("Listing files...")
|
||||
found_files = []
|
||||
for root, dirs, files in os.walk(files_top):
|
||||
for name in files:
|
||||
if name.endswith('.html'):
|
||||
found_files.append(os.path.join(root, name))
|
||||
print("\r{} files".format(len(found_files)), end='', flush=True)
|
||||
|
||||
print()
|
||||
found_broken = False
|
||||
for fpath in tqdm(found_files):
|
||||
with open(fpath) as f:
|
||||
tree = bs4(f.read(), features='lxml', parser='html5')
|
||||
for link in tree.find_all('a'):
|
||||
if 'href' not in link.attrs:
|
||||
continue
|
||||
if ':' in link['href']:
|
||||
continue
|
||||
if link['href'].startswith('/'):
|
||||
target = link['href'] # TODO: Find a better way to model the root
|
||||
else:
|
||||
target = os.path.join(os.path.dirname(fpath), link['href'])
|
||||
if os.path.isdir(target):
|
||||
pass
|
||||
elif not os.path.exists(target):
|
||||
print("[{}] -[ error ]-> {} | {}".format(fpath, target, link['href']))
|
||||
|
||||
if found_broken:
|
||||
exit(1)
|
||||
else:
|
||||
exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: {} FILES_TOP".format(sys.argv[0]))
|
||||
exit(0)
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s")
|
||||
exit(main(sys.argv[1]))
|
Loading…
Reference in New Issue
Block a user