Keep archived file names consistent.

This commit is contained in:
Sergio Martínez Portela 2021-12-17 09:13:09 +01:00
parent 2e83846028
commit db86d2686a

View File

@ -58,12 +58,15 @@ def get_parser():
return parser
def get_extension(path):
def get_filename(path):
return (path
.split('?')[0]
.split('/')[-1]
.split('\\')[-1]
.split('.')[-1])
.split('\\')[-1])
def get_extension(path):
return (get_filename(path)
.split('.')[-1])
def request(url):
req = urllib.request.Request(
@ -84,7 +87,6 @@ def show_error(e, href=None):
def archive(content, base_url, selector, directory, attribute, progbar):
os.makedirs(directory, exist_ok=True)
for part in content.find_all(**selector):
if attribute not in part.attrs:
continue
@ -97,10 +99,11 @@ def archive(content, base_url, selector, directory, attribute, progbar):
progbar.next_iter(href)
name = (hashlib.sha1(href.encode()).hexdigest()
+ '.'
+ get_extension(href))
+ '/'
+ get_filename(part[attribute]))
path = os.path.join(directory, name)
os.makedirs(os.path.dirname(path), exist_ok=True)
if not os.path.exists(path):
try:
content = request(href).read()