From 42869cf410fb184093c8fa863dd54e2f8fc776d9 Mon Sep 17 00:00:00 2001 From: kenkeiras Date: Wed, 5 Jul 2017 00:55:33 +0200 Subject: [PATCH] Use folders to store archives of different sites. --- .gitignore | 2 +- macli.py | 23 +++++++++++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 6476120..1a089a0 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,4 @@ dist/ *.egg-info/ # Directories for testing -test_ma +archive \ No newline at end of file diff --git a/macli.py b/macli.py index 4a487ef..5fdc398 100644 --- a/macli.py +++ b/macli.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import re import hashlib import os import argparse @@ -9,6 +10,9 @@ import urllib.parse from bs4 import BeautifulSoup as bs4 USER_AGENT = 'miniarchiver bot' +ARCHIVE_ROOT = 'archive' +DEFAULT_NAME = 'archived_web' +ALLOWED_NAMES_RE = re.compile(r'^[- .,a-zA-Z0-9]+$') OBJECT_TYPE_DESCRIPTORS = ( @@ -48,6 +52,8 @@ OBJECT_TYPE_DESCRIPTORS = ( def get_parser(): parser = argparse.ArgumentParser() parser.add_argument('url') + parser.add_argument('--name', '-n', default=DEFAULT_NAME) + parser.add_argument('--force', '-f', action='store_true') return parser @@ -140,8 +146,21 @@ def archive_to_dir(directory, url): def main(): args = get_parser().parse_args() - os.makedirs('test_ma', exist_ok=True) - archive_to_dir('test_ma', args.url) + + path = os.path.join(ARCHIVE_ROOT, args.name) + if not ALLOWED_NAMES_RE.match(args.name): + print(("Only characters 'a-zA-Z0-9', spaces, dots, commas and dashes" + "are allowed as names.")) + return + + if os.path.exists(path) and not args.force: + print(("Archive “{}” already exists, set a new name with '-n '" + " or force a overwrite with '-f") + .format(args.name)) + return + + os.makedirs(path, exist_ok=True) + archive_to_dir(path, args.url) if __name__ == '__main__':