Module anchorage.bookmarks
Anchorage bookmark processing
# SPDX-FileCopyrightText: © 2021 Antonio López Rivera <>
# SPDX-License-Identifier: GPL-3.0-only
Anchorage bookmark processing
import os
import re
import sys
import shutil
import datetime
import rapidjson
from tqdm import tqdm
from alexandria.paths import root
from import print_color
from anchorage.anchor_infrs.infrastructure import init, read_config
from anchorage.anchor_utils.aesthetic import colors, str_log_progress
from anchorage.anchor_utils.system import operating_system
from import suppress_stdout
from anchorage.anchor_utils.regex import expr_check
from anchorage.anchor_utils.file_conversion import JSONLZ4_to_JSON
def loc(overwrite=False):
if overwrite:
return read_config()
def path(browser, overwrite=False):
:return: Path of browser of choice Bookmarks file
if browser == "firefox":
o_sys = operating_system()
dir_path = loc(overwrite)[browser][o_sys]
most_recent = datetime.datetime(1970, 1, 1)
for filename in os.listdir(dir_path):
file_date = datetime.datetime.strptime(filename[10:20], "%Y-%m-%d")
if file_date > most_recent:
most_recent = file_date
bk_path = dir_path + "/" + filename
o_sys = operating_system()
bk_path = loc(overwrite)[browser][o_sys]
if o_sys == "windows":
preamble = ("\\".join(str(os.getenv("APPDATA")).split("\\")[:-1]) + "\\")
bk_path = (preamble + bk_path).replace("/", "\\")
return bk_path
def load(path):
:param path: Path to Bookmark JSON or JSONLZ4 file
:return: Parsed JSON file
if path.split('\\')[-1].find('json') != -1: # Check if the JSON extension is present in path
ext = path[len(path)-path[::-1].find('.'):].lower() # If so obtain full extension
if ext == "json": # If full extension = JSON load normally
return rapidjson.load(open(path, encoding="utf8"))
elif ext == "jsonlz4": # IF full extension = JSONLZ4 create bookmark
d = rapidjson.loads(JSONLZ4_to_JSON(path)) # dictionary appropriately
bm_list = d['children'][0]['children']
bm_dirs = [d['name'] for d in bm_list]
bm_dics = [d for d in bm_list]
return {'roots': dict(zip(bm_dirs, bm_dics))}
else: # If no extension is present, the file is assumed
return rapidjson.load(open(path, encoding="utf8")) # to be a JSON file (the case for Chromium browsers)
def export(path, dest=root()):
:param path: Bookmark JSON path
:param dest: Destination directory INCLUDING THE FILENAME
:return: Export bookmark JSON file to the project's root directory
date ='%Y_%m_%d')
filename = f"bookmarks_{date}" + (".jsonzl4" if ".jsonlz4" in path else ".json")
shutil.copyfile(path, os.path.join(dest, filename))
return dest
class bookmarks:
def __init__(self, bookmark_dict,
Prepare bookmarks for archiving.
Instance attributes:
self.links: dictionary of - Name: Dict - pairs, where
Dict: {"url": url,
"tags": [tag_1, tag_2, ..., tag_n]}
self.tags: list containing the ordered names of the parent folders of each link.
:param bookmark_dict: Bookmark JSON file.
:param drop_local_files: Remove local file bookmarks from bookmark list.
:param drop_duplicate_urls: Remove duplicated URLs.
:param drop_dirs: Directories from which no bookmarks are to be archived.
content = bookmark_dict['roots']
self.bookmarks = {}
self.tags = []
self.n_dirs = 0
# Conduct iteration
for directory in content:
self.search_dict(content[directory]) # Iteration
self.tags = [] # "Navigate" back to root
self.n_dirs += 1 # Keep track of the number of directories
# Drop duplicate URLs
if drop_duplicate_urls:
# Filters
if drop_local_files: # Local files
if drop_dirs: # String - Directories
self.drop_string(target=drop_dirs, what='dir')
if drop_names: # String - Names
self.drop_string(target=drop_names, what='name')
if drop_urls: # String - URLs
self.drop_string(target=drop_dirs, what='url')
if drop_dirs_subs: # Substring - Directories
self.drop_substring(target=drop_dirs_subs, what='dir')
if drop_names_subs: # Substring - Names
self.drop_substring(target=drop_names_subs, what='name')
if drop_urls_subs: # Substring - URLs
self.drop_substring(target=drop_urls_subs, what='url')
if drop_dirs_regex: # Regex - Directories
self.drop_regex(regex=drop_dirs_regex, what='dir')
if drop_names_regex: # Regex - Names
self.drop_regex(regex=drop_names_regex, what='name')
if drop_urls_regex: # Regex - URLs
self.drop_regex(regex=drop_urls_regex, what='url')
def search_dict(self, dictionary):
Reduce depth-n bookmark dictionary to depth-1 dictionary of -Name: Dict - pairs, where
Dict: {"url": url,
"tags": tag_list}
and tag_list is a list containing the ordered names of the parent folders of each link.
:param dictionary: Bookmark dictionary from which - Name: [link, tags] - pairs are to be extracted.
:return: Depth-1 dictionary of - Name: [link, tags] - pairs.
if "children" in dictionary.keys():
self.tags.append(dictionary['name']) # Append directory name to tag list
self.n_dirs += 1 # Keep track of the number of directories
self.search_children(dictionary['children']) # Search through children
self.tag_backtrack(dictionary['name']) # After search is over, "navigate" back to parent directory
elif 'url' in dictionary.keys():
# Conduct a regex search for the name of the bookmark among all those previously found.
# Avoids a name conflict caused by the nth duplicate with n>1 due to the first
# duplicate having already a different name to the first one, as
# name of first duplicate = <name> ::anchorage name duplicate:: 2
n_rep = len(list(filter(re.compile(f'{re.escape(dictionary["name"])}').match,
# 1. re.escape
# Avoid issues with bookmarks with special regex sequences in their name
# 2. re.compile(<bookmark name>
# Match any string with the entire bookmark name in it
if n_rep > 0:
key = dictionary['name'] + f" ::anchorage name duplicate:: {n_rep+1}"
key = dictionary['name']
self.bookmarks[key] = {'url': dictionary['url'],
'tags': self.tags.copy()}
pass # Account for possibly empty bookmark folders
def search_children(self, children):
Iterate over children.
:param children: List entries in bookmark dictionary, which are by custom named "children" in
bookmark dictionaries.
for child in children:
def tag_backtrack(self, tag):
Remove tracked tags deeper than input tag.
For a series of nested dictionaries (D) which may contain "children" lists (c)
|_ c[dir_name1] tags = [dir_name1]
|_ D
| |_ c[dirname_2] tags = [dir_name1, dir_name2]
| |_ D
|_ D !! tags = [dir_name1]
It is necessary to be able to "navigate" to the current parent directory after
iterating through any subdirectories.
The "path" to each link is kept in an ordered list. To "navigate" back after
iterating through any previously subdirectories, all elements in the tag list
after the specified (parent) tag are eliminated.
inv_index = self.tags[::-1].index(tag)
index = len(self.tags) - 1 - inv_index
self.tags = self.tags[:index]
except ValueError:
self.tags = [tag]
def drop_duplicate_urls(self):
links = []
for key, value in self.bookmarks.copy().items():
if value['url'] in links:
del self.bookmarks[key]
def drop_local_files(self):
for key, value in self.bookmarks.copy().items():
if "http" not in value["url"][:4]:
del self.bookmarks[key]
def drop_string(self, target, what):
if isinstance(target, str):
target = [target]
# What
if what == 'dir':
target_key = 'tags'
elif what == 'name':
target_key = 'name'
elif what == 'url':
target_key = 'url'
print_color(f'Wrong filter target: {what}', 'red')
for key, value in self.bookmarks.copy().items():
if list(set(target) & set(list([key] if target_key == 'name' else value[target_key]))):
del self.bookmarks[key]
def drop_substring(self, target, what):
if isinstance(target, str):
target = [target]
# What
if what == 'dir':
target_key = 'tags'
elif what == 'name':
target_key = 'name'
elif what == 'url':
target_key = 'url'
print_color(f'Wrong filter target: {what}', 'red')
i = 0
for key, value in self.bookmarks.copy().items():
rm = False
while not rm:
for string in key if target_key == 'name' else value[target_key]:
for substring in target:
if substring in string:
rm = True
if rm:
del self.bookmarks[key]
def drop_regex(self, regex, what):
# Check regex formula for correctness
r = re.compile(regex)
# What
if what == 'dir':
target_key = 'tags'
elif what == 'name':
target_key = 'name'
elif what == 'url':
target_key = 'url'
print_color(f'Wrong filter target: {what}', 'red')
for key, value in self.bookmarks.copy().items():
rm = False
while not rm:
for string in key if target_key == 'name' else value[target_key]:
if r.match(string):
rm = True
if rm:
del self.bookmarks[key]
def loop(self, f,
:param f: Function - To be run on each entry of the bookmark dictionary.
:param loglevel: Python standard library logging-style log levels.
- 0 : Full log output.
- 20: Visualize progress with tqdm progress bar.
- 50: Suppress all output.
:param pb_label: Str - Progress bar label.
:param pb_leave: Boolean - False to remove progress bar from screen after completion.
:param pb_width: N - Width in char of the progress bar.
:return: List with all [key, value] pairs for which `f` execution resulted in an error.
e = {}
t0 =
step = 1
total = len(self.bookmarks)
if loglevel == 20: # Create tqdm progress bar if specified
pgr = tqdm(self.bookmarks.items(),
"%s| {n_fmt}/{total_fmt} "
" {rate_fmt}{postfix}]" % (colors.fgYellow, colors.reset,
colors.fgRed, colors.reset)
for key, value in self.bookmarks.items():
if loglevel == 0:
# Time estimate
tn =
t_elapsed = tn - t0
t_remaining = step/max(t_elapsed.total_seconds(), 10**-6)*(total-step)
print(str_log_progress(f"{step}/{total}", t_elapsed.total_seconds(), t_remaining))
step += 1
try: # Attempt to run provided function on dictionary
if loglevel in [20, 50]:
with suppress_stdout(): # Suppress function output if so specified
f(key, value)
f(key, value)
except BaseException as exception: # Error: add entry to error list
e[key] = {'url': value['url'],
'Error message': str(exception)}
if loglevel == 0:
if loglevel == 20: # Update progress bar if in use
if loglevel == 20:
print("\r" + " "*pb_width, end="\r") # Clean console from debris left by tqdm
return e
def __repr__(self):
:return: Name and URL of all bookmarks, as well as the total number of bookmarks
and directories in the collection.
links = []
for key, value in self.bookmarks.items():
links.append([key, value["url"]])
lstr = "\n".join(" ".join(name_link) for name_link in links)
info = f'\n\nFound: {len(self.bookmarks)} links and {self.n_dirs} directories.'
return lstr + info
def export(path, dest='/opt/hostedtoolcache/Python/3.7.13/x64/bin')
:param path: Bookmark JSON path :param dest: Destination directory INCLUDING THE FILENAME :return: Export bookmark JSON file to the project's root directory
def load(path)
:param path: Path to Bookmark JSON or JSONLZ4 file :return: Parsed JSON file
def loc(overwrite=False)
def path(browser, overwrite=False)
:return: Path of browser of choice Bookmarks file
class bookmarks (bookmark_dict, drop_duplicate_urls=True, drop_local_files=True, drop_dirs=None, drop_names=None, drop_urls=None, drop_dirs_subs=None, drop_urls_subs=None, drop_names_subs=None, drop_dirs_regex=None, drop_urls_regex=None, drop_names_regex=None)
Prepare bookmarks for archiving.
Instance attributes:
self.links: dictionary of - Name: Dict - pairs, where Dict: {"url": url, "tags": [tag_1, tag_2, ..., tag_n]} self.tags: list containing the ordered names of the parent folders of each link.
:param bookmark_dict: Bookmark JSON file. :param drop_local_files: Remove local file bookmarks from bookmark list. :param drop_duplicate_urls: Remove duplicated URLs. :param drop_dirs: Directories from which no bookmarks are to be archived.
def drop_duplicate_urls(self)
def drop_local_files(self)
def drop_regex(self, regex, what)
def drop_string(self, target, what)
def drop_substring(self, target, what)
def loop(self, f, loglevel=0, pb_label=None, pb_leave=True, pb_width=110)
:param f: Function - To be run on each entry of the bookmark dictionary. :param loglevel: Python standard library logging-style log levels. - 0 : Full log output. - 20: Visualize progress with tqdm progress bar. - 50: Suppress all output. :param pb_label: Str - Progress bar label. :param pb_leave: Boolean - False to remove progress bar from screen after completion. :param pb_width: N - Width in char of the progress bar. :return: List with all [key, value] pairs for which
def search_children(self, children)
Iterate over children.
:param children: List entries in bookmark dictionary, which are by custom named "children" in bookmark dictionaries.
def search_dict(self, dictionary)
Reduce depth-n bookmark dictionary to depth-1 dictionary of -Name: Dict - pairs, where
Dict: {"url": url, "tags": tag_list}
and tag_list is a list containing the ordered names of the parent folders of each link.
:param dictionary: Bookmark dictionary from which - Name: [link, tags] - pairs are to be extracted. :return: Depth-1 dictionary of - Name: [link, tags] - pairs.
def tag_backtrack(self, tag)
Remove tracked tags deeper than input tag. For a series of nested dictionaries (D) which may contain "children" lists (c)
D |_ c[dir_name1] tags = [dir_name1] |_ D | |_ c[dirname_2] tags = [dir_name1, dir_name2] | |_ D |_ D !! tags = [dir_name1] It is necessary to be able to "navigate" to the current parent directory after iterating through any subdirectories. The "path" to each link is kept in an ordered list. To "navigate" back after iterating through any previously subdirectories, all elements in the tag list after the specified (parent) tag are eliminated.
