From 99274cf6d78604a170a1103e6445f42435c4d18f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20Mart=C3=ADnez=20Portela?= Date: Fri, 17 Dec 2021 09:54:09 +0100 Subject: [PATCH] Add source set handling. --- macli.py | 58 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/macli.py b/macli.py index cf4ab13..5c282c5 100644 --- a/macli.py +++ b/macli.py @@ -25,6 +25,13 @@ OBJECT_TYPE_DESCRIPTORS = ( 'styles', 'href', ), + ( + { + 'name': 'source' + }, + 'sets', + 'srcset', + ), ( { 'name': 'img' @@ -94,27 +101,44 @@ def archive(content, base_url, selector, directory, attribute, progbar): if part[attribute].startswith('data:'): continue - href = urllib.parse.urljoin(base_url, part[attribute], - allow_fragments=False) - progbar.next_iter(href) + selectors = [] + if attribute == "srcset": + # Dirty hack to support srcset syntax + hrefs = [] + for option in part[attribute].split(','): + val, selector = option.split() + hrefs.append(urllib.parse.urljoin(base_url, val, + allow_fragments=False)) + selectors.append(selector) + else: + hrefs = [urllib.parse.urljoin(base_url, part[attribute], + allow_fragments=False)] - name = (hashlib.sha1(href.encode()).hexdigest() - + '/' - + get_filename(part[attribute])) + resolveds = [] + for i, href in enumerate(hrefs): + progbar.next_iter(href) - path = os.path.join(directory, name) - os.makedirs(os.path.dirname(path), exist_ok=True) - if not os.path.exists(path): - try: - content = request(href).read() - except Exception as e: - show_error(e, href) - continue + name = (hashlib.sha1(href.encode()).hexdigest() + + '/' + + get_filename(part[attribute])) - with open(path, 'wb') as f: - f.write(content) + path = os.path.join(directory, name) + os.makedirs(os.path.dirname(path), exist_ok=True) + if not os.path.exists(path): + try: + content = request(href).read() + except Exception as e: + show_error(e, href) + continue - part[attribute] = path + with open(path, 'wb') as f: + f.write(content) + + resolveds.append(path) + if i < len(selectors): + resolveds[-1] += ' ' + selectors[i] + + part[attribute] = ', '.join(resolveds) def relink_links(content, base_url):