From db86d2686afcb70acb7d007eb3976c4573c9ad78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Fri, 17 Dec 2021 09:13:09 +0100 Subject: [PATCH] Keep archived file names consistent. --- macli.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/macli.py b/macli.py index 3e2dcd0..eaf9e23 100644 --- a/macli.py +++ b/macli.py @@ -58,12 +58,15 @@ def get_parser(): return parser -def get_extension(path): +def get_filename(path): return (path + .split('?')[0] .split('/')[-1] - .split('\\')[-1] - .split('.')[-1]) + .split('\\')[-1]) +def get_extension(path): + return (get_filename(path) + .split('.')[-1]) def request(url): req = urllib.request.Request( @@ -84,7 +87,6 @@ def show_error(e, href=None): def archive(content, base_url, selector, directory, attribute, progbar): - os.makedirs(directory, exist_ok=True) for part in content.find_all(**selector): if attribute not in part.attrs: continue @@ -97,10 +99,11 @@ def archive(content, base_url, selector, directory, attribute, progbar): progbar.next_iter(href) name = (hashlib.sha1(href.encode()).hexdigest() - + '.' - + get_extension(href)) + + '/' + + get_filename(part[attribute])) path = os.path.join(directory, name) + os.makedirs(os.path.dirname(path), exist_ok=True) if not os.path.exists(path): try: content = request(href).read()