Keep archived file names consistent.
This commit is contained in:
parent
2e83846028
commit
db86d2686a
15
macli.py
15
macli.py
@ -58,12 +58,15 @@ def get_parser():
|
||||
return parser
|
||||
|
||||
|
||||
def get_extension(path):
|
||||
def get_filename(path):
|
||||
return (path
|
||||
.split('?')[0]
|
||||
.split('/')[-1]
|
||||
.split('\\')[-1]
|
||||
.split('.')[-1])
|
||||
.split('\\')[-1])
|
||||
|
||||
def get_extension(path):
|
||||
return (get_filename(path)
|
||||
.split('.')[-1])
|
||||
|
||||
def request(url):
|
||||
req = urllib.request.Request(
|
||||
@ -84,7 +87,6 @@ def show_error(e, href=None):
|
||||
|
||||
|
||||
def archive(content, base_url, selector, directory, attribute, progbar):
|
||||
os.makedirs(directory, exist_ok=True)
|
||||
for part in content.find_all(**selector):
|
||||
if attribute not in part.attrs:
|
||||
continue
|
||||
@ -97,10 +99,11 @@ def archive(content, base_url, selector, directory, attribute, progbar):
|
||||
progbar.next_iter(href)
|
||||
|
||||
name = (hashlib.sha1(href.encode()).hexdigest()
|
||||
+ '.'
|
||||
+ get_extension(href))
|
||||
+ '/'
|
||||
+ get_filename(part[attribute]))
|
||||
|
||||
path = os.path.join(directory, name)
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
if not os.path.exists(path):
|
||||
try:
|
||||
content = request(href).read()
|
||||
|
Loading…
Reference in New Issue
Block a user