Use folders to store archives of different sites.
This commit is contained in:
parent
a2bc995885
commit
42869cf410
2
.gitignore
vendored
2
.gitignore
vendored
@ -15,4 +15,4 @@ dist/
|
||||
*.egg-info/
|
||||
|
||||
# Directories for testing
|
||||
test_ma
|
||||
archive
|
23
macli.py
23
macli.py
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
import os
|
||||
import argparse
|
||||
@ -9,6 +10,9 @@ import urllib.parse
|
||||
from bs4 import BeautifulSoup as bs4
|
||||
|
||||
USER_AGENT = 'miniarchiver bot'
|
||||
ARCHIVE_ROOT = 'archive'
|
||||
DEFAULT_NAME = 'archived_web'
|
||||
ALLOWED_NAMES_RE = re.compile(r'^[- .,a-zA-Z0-9]+$')
|
||||
|
||||
|
||||
OBJECT_TYPE_DESCRIPTORS = (
|
||||
@ -48,6 +52,8 @@ OBJECT_TYPE_DESCRIPTORS = (
|
||||
def get_parser():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('url')
|
||||
parser.add_argument('--name', '-n', default=DEFAULT_NAME)
|
||||
parser.add_argument('--force', '-f', action='store_true')
|
||||
return parser
|
||||
|
||||
|
||||
@ -140,8 +146,21 @@ def archive_to_dir(directory, url):
|
||||
|
||||
def main():
|
||||
args = get_parser().parse_args()
|
||||
os.makedirs('test_ma', exist_ok=True)
|
||||
archive_to_dir('test_ma', args.url)
|
||||
|
||||
path = os.path.join(ARCHIVE_ROOT, args.name)
|
||||
if not ALLOWED_NAMES_RE.match(args.name):
|
||||
print(("Only characters 'a-zA-Z0-9', spaces, dots, commas and dashes"
|
||||
"are allowed as names."))
|
||||
return
|
||||
|
||||
if os.path.exists(path) and not args.force:
|
||||
print(("Archive “{}” already exists, set a new name with '-n <name>'"
|
||||
" or force a overwrite with '-f")
|
||||
.format(args.name))
|
||||
return
|
||||
|
||||
os.makedirs(path, exist_ok=True)
|
||||
archive_to_dir(path, args.url)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
Loading…
Reference in New Issue
Block a user