-
Notifications
You must be signed in to change notification settings - Fork 0
/
synchronize.py
209 lines (176 loc) · 10.1 KB
/
synchronize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
from instapaper import Instapaper
from instapaper import Bookmark
import shutil
from typing import Dict, Tuple, AnyStr, Iterable, Optional
from pathlib import Path
from download import BookmarkDownloader
import json
import os
import sys
import time
NUM_BOOKMARKS_TO_SYNCHRONIZE = 500 # The maximum value the API allows
class BookmarkSynchronizer(object):
def __init__(self):
self.instapaper : Optional[Instapaper] = None
self.downloader : Optional[BookmarkDownloader] = None
def login(self):
with open("oauth_config.json", "r") as f:
oauth_config = json.load(f)
with open("user_credentials.json", "r") as f:
user_credentials = json.load(f)
self.instapaper = Instapaper(oauth_config['id'], oauth_config['secret'])
self.instapaper.login(user_credentials['username'], user_credentials['password'])
def online_folder_list(self) -> Iterable[Dict[AnyStr, AnyStr]]:
return [{'title': folder['title'], 'folder_id': str(folder['folder_id'])} for folder in self.instapaper.folders()] + [{'title': "unread", 'folder_id': "unread"}, {'title': "archive", 'folder_id': "archive"}]
def local_folder_list(self):
return [{'title' : f.name.split("_")[:-1],
'folder_id': f.name.split("_")[-1],
'folder_path' : f} for f in Path('books').iterdir() if f.is_dir()]
def synchronize(self):
online_folders = self.online_folder_list()
local_folders = self.local_folder_list()
self.synchronize_folders(online_folders, local_folders)
self.synchronize_bookmarks(online_folders, local_folders)
def synchronize_folders(self, online_folders, local_folders):
online_folder_ids = [folder['folder_id'] for folder in online_folders]
local_folder_ids = [folder['folder_id'] for folder in local_folders]
folders_to_create = self.select_folders(
set(online_folder_ids) - set(local_folder_ids),
online_folders)
folders_to_delete = self.select_folders(
set(local_folder_ids) - set(online_folder_ids),
local_folders)
for folder in folders_to_create:
(Path('books') / self.folder_to_directory_name(folder)).mkdir()
for folder in folders_to_delete:
# Move to not delete in case of error
shutil.move(Path('books') / self.folder_to_directory_name(folder),
Path('books') / 'deleted' / self.folder_to_directory_name(folder))
def select_folders(self, folder_ids, folders):
return [folder for folder in folders if folder['folder_id'] in folder_ids]
def folder_to_directory_name(self, folder):
return "_".join(folder['title'].split(" ")) + "_" + str(folder['folder_id'])
def synchronize_bookmarks(self, online_folders: Iterable[Dict], local_folders: Iterable[Dict]):
"""Actual synchronize: Three way merge between the online version, the local version, and a stored index"""
# Step 1: Create a tree for online and local version
print("-- Get Trees --")
online_tree, bookmarks = self.create_tree_from_online_version(online_folders)
local_tree, paths = self.create_tree_from_local_version(local_folders)
if os.path.exists("index.json"):
with open("index.json", "r") as f:
index_tree = json.load(f)
index_tree = {int(k): v for k, v in index_tree.items()}
else:
# In case there is no stored index, we use an empty dictionary. That way the diffing will interpret any inconsitencies as conflicts and resolve them by favoring the online version.
index_tree = dict()
print("Discovered online bookmarks: ", len(online_tree))
print("Discovered local bookmarks: ", len(local_tree))
# Step 2: Three-way-diff with tree stored in index (if there is no index then use the online tree) resulting in diff
print("-- Start Diffing --")
local_diff, online_diff = self.three_way_diff(online_tree, local_tree, index_tree)
print("Online changes: ", len(online_diff))
print("Local changes: ", len(local_diff))
# Step 3: Apply diff to local and online version, conflicts are resolved by favoring online version
print("-- Apply Diffs --")
self.apply_diff_to_local_version(local_tree, paths, bookmarks, local_diff, local_folders)
self.apply_diff_to_online_version(online_tree, bookmarks, online_diff)
# Step 4: Store resulting tree for next iteration
print("-- Storing Index --")
resulting_tree, _ = self.create_tree_from_local_version(local_folders)
with open("index.json", "w") as f:
json.dump(resulting_tree, f)
# Create tree by traversing folders and bookmarks, tree nodes contain bookmark id and bookmark object
def create_tree_from_online_version(self, online_folders) -> Tuple[Dict[int, AnyStr], Dict[int, Bookmark]]:
tree : Dict[int, AnyStr] = {}
bookmarks : Dict[int, Bookmark] = {}
folder_ids = [folder['folder_id'] for folder in online_folders]
for folder_id in folder_ids:
for bookmark in self.instapaper.bookmarks(folder=folder_id, limit=NUM_BOOKMARKS_TO_SYNCHRONIZE):
tree[bookmark.bookmark_id] = str(folder_id)
bookmarks[bookmark.bookmark_id] = bookmark
return tree, bookmarks
def create_tree_from_local_version(self, local_folders) -> Tuple[Dict[int, AnyStr], Dict[int, Path]]:
tree = {}
paths = {}
for folder in map(lambda x: x['folder_path'], local_folders):
folder_id = folder.name.split('_')[-1]
for book in folder.iterdir():
book_id = int(book.stem.split('_')[-1]) # Extract bookmark id from filename
tree[book_id] = folder_id
paths[book_id] = book.absolute()
return tree, paths
def three_way_diff(self, online_tree: Dict[int, AnyStr], local_tree: Dict[int, AnyStr], index_tree: Dict[int, AnyStr]):
bookmark_ids = set(online_tree.keys()).union(index_tree.keys())
local_diff = {}
online_diff = {}
for bookmark_id in bookmark_ids:
online_folder = online_tree.get(bookmark_id, None)
local_folder = local_tree.get(bookmark_id, None)
index_folder = index_tree.get(bookmark_id, None)
if online_folder == local_folder == index_folder:
continue
elif online_folder != index_folder and local_folder == index_folder:
# Only online changed
local_diff[bookmark_id] = online_folder
elif local_folder != index_folder and online_folder == index_folder:
# Only local changed
online_diff[bookmark_id] = local_folder
elif local_folder != index_folder and online_folder != index_folder and local_folder != online_folder:
# Both changed and they disagree on the change
# -> we generally take the online version, but also apply our change in case the online
# version was or is not visible anymore due to the API limit
if online_folder == None:
online_diff[bookmark_id] = local_folder
local_diff[bookmark_id] = online_folder
elif local_folder == online_folder and local_folder != index_folder:
# Both changed but they agree on the change
# -> nothing to do
pass
else:
print(f"Invalid situation on bookmark {bookmark_id} (online folder: {online_folder}, local folder: {local_folder}, index folder: {index_folder})",file=sys.stderr)
sys.exit(1)
return local_diff, online_diff
def apply_diff_to_local_version(self, tree, paths: Dict[int, Path], bookmarks, local_diff : Dict, local_folders : Iterable[Dict]):
for bookmark_id, folder_id in local_diff.items():
if folder_id:
folders = [f for f in local_folders if f["folder_id"] == folder_id]
if not folders:
sys.exit(f"Folder with id {folder_id} not found.")
folder = folders[0]["folder_path"]
if not bookmark_id in tree.keys():
# We do not yet have the book, download and store book
self.download_bookmark_to_folder(bookmarks[bookmark_id], folder.absolute())
else:
# We have the book, move it to the folder
shutil.move(paths[bookmark_id], folder / paths[bookmark_id].name)
else:
# The file does not exist online anymore (or was not delivered to us, due to the query limit)
paths[bookmark_id].unlink()
def download_bookmark_to_folder(self, bookmark, folder_path : Path):
if not self.downloader:
self.downloader = BookmarkDownloader(self.instapaper)
self.downloader.download_bookmark_to_folder(bookmark, folder_path)
# Wait for 1 second to give server a break
time.sleep(1)
def synthesize_bookmark(self, bookmark_id):
return Bookmark(self.instapaper, {"bookmark_id": bookmark_id})
def apply_diff_to_online_version(self, tree, bookmarks: Dict[int, Bookmark], online_diff : Dict):
for bookmark_id, folder_id in online_diff.items():
# We synthesize a bookmark in case it is not visible in the online tree anymore (deleted/too old),
# but we still want to operate on it
bookmark = bookmarks[bookmark_id] if bookmark_id in bookmarks else self.synthesize_bookmark(bookmark_id)
if folder_id == "unread":
bookmark.unarchive()
elif folder_id == "archive":
bookmark.archive()
elif folder_id == None:
# This is a local deletion, we ignore these.
pass
elif bookmark_id in tree.keys():
bookmark.move(folder_id)
else:
raise Exception("Bookmark not found in local tree. Uploading bookmarks is not supported.")
if __name__ == '__main__':
synchronizer = BookmarkSynchronizer()
synchronizer.login()
synchronizer.synchronize()