Keep archived file names consistent.

This commit is contained in:
Sergio Martínez Portela 2021-12-17 09:13:09 +01:00
parent 2e83846028
commit db86d2686a

View File

@ -58,12 +58,15 @@ def get_parser():
return parser return parser
def get_extension(path): def get_filename(path):
return (path return (path
.split('?')[0]
.split('/')[-1] .split('/')[-1]
.split('\\')[-1] .split('\\')[-1])
.split('.')[-1])
def get_extension(path):
return (get_filename(path)
.split('.')[-1])
def request(url): def request(url):
req = urllib.request.Request( req = urllib.request.Request(
@ -84,7 +87,6 @@ def show_error(e, href=None):
def archive(content, base_url, selector, directory, attribute, progbar): def archive(content, base_url, selector, directory, attribute, progbar):
os.makedirs(directory, exist_ok=True)
for part in content.find_all(**selector): for part in content.find_all(**selector):
if attribute not in part.attrs: if attribute not in part.attrs:
continue continue
@ -97,10 +99,11 @@ def archive(content, base_url, selector, directory, attribute, progbar):
progbar.next_iter(href) progbar.next_iter(href)
name = (hashlib.sha1(href.encode()).hexdigest() name = (hashlib.sha1(href.encode()).hexdigest()
+ '.' + '/'
+ get_extension(href)) + get_filename(part[attribute]))
path = os.path.join(directory, name) path = os.path.join(directory, name)
os.makedirs(os.path.dirname(path), exist_ok=True)
if not os.path.exists(path): if not os.path.exists(path):
try: try:
content = request(href).read() content = request(href).read()