Keep archived file names consistent.
This commit is contained in:
parent
2e83846028
commit
db86d2686a
15
macli.py
15
macli.py
@ -58,12 +58,15 @@ def get_parser():
|
|||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
def get_extension(path):
|
def get_filename(path):
|
||||||
return (path
|
return (path
|
||||||
|
.split('?')[0]
|
||||||
.split('/')[-1]
|
.split('/')[-1]
|
||||||
.split('\\')[-1]
|
.split('\\')[-1])
|
||||||
.split('.')[-1])
|
|
||||||
|
|
||||||
|
def get_extension(path):
|
||||||
|
return (get_filename(path)
|
||||||
|
.split('.')[-1])
|
||||||
|
|
||||||
def request(url):
|
def request(url):
|
||||||
req = urllib.request.Request(
|
req = urllib.request.Request(
|
||||||
@ -84,7 +87,6 @@ def show_error(e, href=None):
|
|||||||
|
|
||||||
|
|
||||||
def archive(content, base_url, selector, directory, attribute, progbar):
|
def archive(content, base_url, selector, directory, attribute, progbar):
|
||||||
os.makedirs(directory, exist_ok=True)
|
|
||||||
for part in content.find_all(**selector):
|
for part in content.find_all(**selector):
|
||||||
if attribute not in part.attrs:
|
if attribute not in part.attrs:
|
||||||
continue
|
continue
|
||||||
@ -97,10 +99,11 @@ def archive(content, base_url, selector, directory, attribute, progbar):
|
|||||||
progbar.next_iter(href)
|
progbar.next_iter(href)
|
||||||
|
|
||||||
name = (hashlib.sha1(href.encode()).hexdigest()
|
name = (hashlib.sha1(href.encode()).hexdigest()
|
||||||
+ '.'
|
+ '/'
|
||||||
+ get_extension(href))
|
+ get_filename(part[attribute]))
|
||||||
|
|
||||||
path = os.path.join(directory, name)
|
path = os.path.join(directory, name)
|
||||||
|
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
try:
|
try:
|
||||||
content = request(href).read()
|
content = request(href).read()
|
||||||
|
Loading…
Reference in New Issue
Block a user