Use folders to store archives of different sites.
This commit is contained in:
parent
a2bc995885
commit
42869cf410
2
.gitignore
vendored
2
.gitignore
vendored
@ -15,4 +15,4 @@ dist/
|
|||||||
*.egg-info/
|
*.egg-info/
|
||||||
|
|
||||||
# Directories for testing
|
# Directories for testing
|
||||||
test_ma
|
archive
|
23
macli.py
23
macli.py
@ -1,5 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import re
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import argparse
|
import argparse
|
||||||
@ -9,6 +10,9 @@ import urllib.parse
|
|||||||
from bs4 import BeautifulSoup as bs4
|
from bs4 import BeautifulSoup as bs4
|
||||||
|
|
||||||
USER_AGENT = 'miniarchiver bot'
|
USER_AGENT = 'miniarchiver bot'
|
||||||
|
ARCHIVE_ROOT = 'archive'
|
||||||
|
DEFAULT_NAME = 'archived_web'
|
||||||
|
ALLOWED_NAMES_RE = re.compile(r'^[- .,a-zA-Z0-9]+$')
|
||||||
|
|
||||||
|
|
||||||
OBJECT_TYPE_DESCRIPTORS = (
|
OBJECT_TYPE_DESCRIPTORS = (
|
||||||
@ -48,6 +52,8 @@ OBJECT_TYPE_DESCRIPTORS = (
|
|||||||
def get_parser():
|
def get_parser():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('url')
|
parser.add_argument('url')
|
||||||
|
parser.add_argument('--name', '-n', default=DEFAULT_NAME)
|
||||||
|
parser.add_argument('--force', '-f', action='store_true')
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
@ -140,8 +146,21 @@ def archive_to_dir(directory, url):
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
args = get_parser().parse_args()
|
args = get_parser().parse_args()
|
||||||
os.makedirs('test_ma', exist_ok=True)
|
|
||||||
archive_to_dir('test_ma', args.url)
|
path = os.path.join(ARCHIVE_ROOT, args.name)
|
||||||
|
if not ALLOWED_NAMES_RE.match(args.name):
|
||||||
|
print(("Only characters 'a-zA-Z0-9', spaces, dots, commas and dashes"
|
||||||
|
"are allowed as names."))
|
||||||
|
return
|
||||||
|
|
||||||
|
if os.path.exists(path) and not args.force:
|
||||||
|
print(("Archive “{}” already exists, set a new name with '-n <name>'"
|
||||||
|
" or force a overwrite with '-f")
|
||||||
|
.format(args.name))
|
||||||
|
return
|
||||||
|
|
||||||
|
os.makedirs(path, exist_ok=True)
|
||||||
|
archive_to_dir(path, args.url)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
Reference in New Issue
Block a user