Add source set handling.

This commit is contained in:
Sergio Martínez Portela 2021-12-17 09:54:09 +01:00
parent 1c609454ed
commit 99274cf6d7

View File

@ -25,6 +25,13 @@ OBJECT_TYPE_DESCRIPTORS = (
'styles', 'styles',
'href', 'href',
), ),
(
{
'name': 'source'
},
'sets',
'srcset',
),
( (
{ {
'name': 'img' 'name': 'img'
@ -94,27 +101,44 @@ def archive(content, base_url, selector, directory, attribute, progbar):
if part[attribute].startswith('data:'): if part[attribute].startswith('data:'):
continue continue
href = urllib.parse.urljoin(base_url, part[attribute], selectors = []
allow_fragments=False) if attribute == "srcset":
progbar.next_iter(href) # Dirty hack to support srcset syntax
hrefs = []
for option in part[attribute].split(','):
val, selector = option.split()
hrefs.append(urllib.parse.urljoin(base_url, val,
allow_fragments=False))
selectors.append(selector)
else:
hrefs = [urllib.parse.urljoin(base_url, part[attribute],
allow_fragments=False)]
name = (hashlib.sha1(href.encode()).hexdigest() resolveds = []
+ '/' for i, href in enumerate(hrefs):
+ get_filename(part[attribute])) progbar.next_iter(href)
path = os.path.join(directory, name) name = (hashlib.sha1(href.encode()).hexdigest()
os.makedirs(os.path.dirname(path), exist_ok=True) + '/'
if not os.path.exists(path): + get_filename(part[attribute]))
try:
content = request(href).read()
except Exception as e:
show_error(e, href)
continue
with open(path, 'wb') as f: path = os.path.join(directory, name)
f.write(content) os.makedirs(os.path.dirname(path), exist_ok=True)
if not os.path.exists(path):
try:
content = request(href).read()
except Exception as e:
show_error(e, href)
continue
part[attribute] = path with open(path, 'wb') as f:
f.write(content)
resolveds.append(path)
if i < len(selectors):
resolveds[-1] += ' ' + selectors[i]
part[attribute] = ', '.join(resolveds)
def relink_links(content, base_url): def relink_links(content, base_url):