Module anchorage.anchor_tools.online
Online archival methods
Expand source code
# SPDX-FileCopyrightText: © 2021 Antonio López Rivera <antonlopezr99@gmail.com>
# SPDX-License-Identifier: GPL-3.0-only
"""
Online archival methods
-----------------------
"""
from wayback import WaybackClient
from archivenow import archivenow
from alexandria.shell import suppress_stdout
from anchorage.anchor_utils.aesthetic import str_log_info, str_log_error, str_log_success
class UploadException(Exception):
def __init__(self, message):
super().__init__(message)
def add(url, archive='ia', api_key=None, overwrite=False):
"""
Archive a website in one of the four archives supported by Archive Now (archivenow).
TODO: Recognize internetarchive upload error messages as failures.
:param url: URL of website to be archived.
:param archive: List or string specifying archives to which to save the website.
Available archives:
- 'all': All archives
- 'ia': Internet Archive (default)
- 'is': Archive.is
- 'mg': Megalodon.jp
- 'cc': Perma.cc
:param api_key: Perma.cc API key. Format:
{"cc_api_key":"$YOUR-Perma-cc-API-KEY"}
:param overwrite: Archive URL even if it's already present in the Internet Archive.
"""
def upload(url):
if archive == 'cc':
with suppress_stdout():
archive_url = archivenow.push(url, archive, api_key)[0]
else:
with suppress_stdout():
archive_url = archivenow.push(url, archive)[0]
if "Error (The Internet Archive)" in archive_url:
print(str_log_error(url + " ->/ ->/ ->/ " + archive_url))
raise UploadException(archive_url)
else:
log = str_log_success(url + " -> -> -> " + archive_url)
return log
try:
archive_latest = next(WaybackClient().search(url)) # Search for URL using the WaybackMachine API
if overwrite:
log = upload(url)
print(log)
else:
log = str_log_info("SKIPPED", url + " => => => " + archive_latest[7])
print(log)
except StopIteration:
# If bookmark search yields "0" error (defined by Python _wayback_)
log = upload(url)
print(log)
except UploadException as e:
return e
Functions
def add(url, archive='ia', api_key=None, overwrite=False)
-
Archive a website in one of the four archives supported by Archive Now (archivenow).
TODO: Recognize internetarchive upload error messages as failures.
:param url: URL of website to be archived. :param archive: List or string specifying archives to which to save the website. Available archives: - 'all': All archives - 'ia': Internet Archive (default) - 'is': Archive.is - 'mg': Megalodon.jp - 'cc': Perma.cc :param api_key: Perma.cc API key. Format: {"cc_api_key":"$YOUR-Perma-cc-API-KEY"} :param overwrite: Archive URL even if it's already present in the Internet Archive.
Expand source code
def add(url, archive='ia', api_key=None, overwrite=False): """ Archive a website in one of the four archives supported by Archive Now (archivenow). TODO: Recognize internetarchive upload error messages as failures. :param url: URL of website to be archived. :param archive: List or string specifying archives to which to save the website. Available archives: - 'all': All archives - 'ia': Internet Archive (default) - 'is': Archive.is - 'mg': Megalodon.jp - 'cc': Perma.cc :param api_key: Perma.cc API key. Format: {"cc_api_key":"$YOUR-Perma-cc-API-KEY"} :param overwrite: Archive URL even if it's already present in the Internet Archive. """ def upload(url): if archive == 'cc': with suppress_stdout(): archive_url = archivenow.push(url, archive, api_key)[0] else: with suppress_stdout(): archive_url = archivenow.push(url, archive)[0] if "Error (The Internet Archive)" in archive_url: print(str_log_error(url + " ->/ ->/ ->/ " + archive_url)) raise UploadException(archive_url) else: log = str_log_success(url + " -> -> -> " + archive_url) return log try: archive_latest = next(WaybackClient().search(url)) # Search for URL using the WaybackMachine API if overwrite: log = upload(url) print(log) else: log = str_log_info("SKIPPED", url + " => => => " + archive_latest[7]) print(log) except StopIteration: # If bookmark search yields "0" error (defined by Python _wayback_) log = upload(url) print(log) except UploadException as e: return e
Classes
class UploadException (message)
-
Common base class for all non-exit exceptions.
Expand source code
class UploadException(Exception): def __init__(self, message): super().__init__(message)
Ancestors
- builtins.Exception
- builtins.BaseException