-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathflickrbackup.py
executable file
·513 lines (413 loc) · 19.7 KB
/
flickrbackup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
#!/usr/bin/env python3
# requires flickrapi, threadpool
# Baesd on http://nathanvangheem.com/scripts/migrateflickrtopicasanokeyresize.py
import os
import os.path
import re
import shutil
import datetime
import dateutil.parser
import argparse
import urllib.request
import flickrapi
import flickrapi.exceptions
import threadpool
import threading
import sys
import logging
import tempfile
FLICKR_API_KEY = "39b564af2057a7d014875e4939a292db"
FLICKR_API_SECRET = "32cb192e3b9c43e6"
METADATA_EXTENSION = 'txt'
STAMP_FILENAME = '.stamp'
THREADED = True # Turn off for easier debugging
dirlock = threading.RLock()
logger = logging.getLogger('flickrbackup')
class Photo(object):
def __init__(self, id, original_secret=None, original_format=None,
media='photo', farm=None, server=None,
title=None, description=None, date_taken=None,
is_public=None, is_friend=None, is_family=None,
tags=None, url_o=None, url_l=None, flickr_usernsid=None,
):
self.id = id
self.original_secret = original_secret
self.original_format = original_format
self.media = media
self.farm = farm
self.server = server
self.title = title
self.description = description
self.date_taken = date_taken
self.is_public = is_public
self.is_friend = is_friend
self.is_family = is_family
self.tags = tags
self._url_o = url_o
self._url_l = url_l
self.flickr_usernsid = flickr_usernsid
@property
def url(self):
if self._url_o:
return self._url_o
elif self._url_l:
return self._url_l
elif self.media == 'video':
return "http://www.flickr.com/photos/%s/%s/play/orig/%s" % (self.flickr_usernsid, self.id, self.original_secret)
else:
return "http://farm%s.staticflickr.com/%s/%s_%s_o.%s" % (self.farm, self.server, self.id, self.original_secret, self.original_format)
@classmethod
def fromInfo(cls, info, flickr_usernsid=None):
return Photo(
id=info.get('id'),
original_secret=info.get('originalsecret'),
original_format=info.get('originalformat') or "jpg",
media=info.get('media'),
farm=info.get('farm'),
server=info.get('server'),
title=info.find('title').text,
description=info.find('description').text,
date_taken=info.find('dates').get('taken'),
is_public=info.find('visibility').get('ispublic') == '1',
is_friend=info.find('visibility').get('isfriend') == '1',
is_family=info.find('visibility').get('isfamily') == '1',
tags=[t.text for t in info.find('tags').findall('tag')],
flickr_usernsid=flickr_usernsid,
)
@classmethod
def fromSearchResult(cls, info, flickr_usernsid=None):
return Photo(
id=info.get('id'),
original_secret=info.get('originalsecret'),
original_format=info.get('originalformat') or "jpg",
media=info.get('media'),
farm=info.get('farm'),
server=info.get('server'),
title=info.get('title'),
description=info.find('description').text,
date_taken=info.get('datetaken'),
is_public=info.get('ispublic') == '1',
is_friend=info.get('isfriend') == '1',
is_family=info.get('isfamily') == '1',
tags=info.get('tags').split(' '),
flickr_usernsid=info.get('owner') or flickr_usernsid,
url_o=info.get('url_o'),
url_l=info.get('url_l'),
)
class FlickrBackup(object):
def __init__(self, destination, store_once=False, keep_existing=False, favorites=False, retry=1, verbose=False, token_cache=None, threadpoolsize=7):
self.destination = destination
self.store_once = store_once
self.keep_existing = keep_existing
self.favorites = favorites
self.max_retries = retry
self.verbose = verbose
self.threadpoolsize = threadpoolsize
self.token_cache = token_cache
# Initialise connection to Flickr
self.flickr_api, self.flickr_usernsid = self.retrieve_flickr_token()
# Operations
def download_photo(self, photo):
"""Download a single Photo
"""
def download_callback(count, blocksize, totalsize):
if not self.verbose:
return
download_stat_print = set((0.0, .25, .5, 1.0))
downloaded = float(count * blocksize)
res = int((downloaded / totalsize) * 100.0)
for st in download_stat_print:
dl = totalsize * st
diff = downloaded - dl
if diff >= -(blocksize / 2) and diff <= (blocksize / 2):
downloaded_so_far = float(count * blocksize) / 1024.0 / 1024.0
total_size_in_mb = float(totalsize) / 1024.0 / 1024.0
print("Photo: %s --- %i%% - %.1f/%.1fmb" % (photo.title, res, downloaded_so_far, total_size_in_mb))
dirname = self.destination
photo_sets = []
if photo.media == 'video':
# XXX: There doesn't seem to be a way to discover original file extension (?)
filename = photo.id + ".mov"
else:
filename = photo.id + "." + photo.original_format
if not self.favorites:
# Create a photo set directory from the first set the photo is a member of
photo_sets = self.get_photo_sets(photo)
if len(photo_sets) > 0:
dirname = self.get_set_directory(photo_sets[0])
dirname = self.get_date_directory(dirname, photo)
# Download
logger.debug('Processing photo "%s" at url "%s".', photo.title, photo.url)
filepath = os.path.join(dirname, filename)
if self.keep_existing and os.path.exists(filepath):
logger.debug('Image "%s" at %s already exists.', photo.title, filepath)
else:
tmp_fd, tmp_filename = tempfile.mkstemp()
tmp_filename, _ = urllib.request.urlretrieve(photo.url, tmp_filename, download_callback)
os.close(tmp_fd)
shutil.move(tmp_filename, filepath)
self.write_metadata(filepath, photo)
logger.debug('Download of "%s" at %s to %s finished.', photo.title, photo.url, filepath)
# Copy to additional set directories
if not self.store_once and not self.favorites:
for photo_set in photo_sets[1:]:
copy_dirname = self.get_set_directory(photo_set)
copy_dirname = self.get_date_directory(copy_dirname, photo)
copy_filepath = os.path.join(copy_dirname, filename)
if self.keep_existing and os.path.exists(copy_filepath):
logger.debug('Image "%s" at %s already exists.', photo.title, filepath)
elif filepath != copy_filepath:
shutil.copyfile(filepath, copy_filepath)
shutil.copyfile(filepath + "." + METADATA_EXTENSION, copy_filepath + "." + METADATA_EXTENSION)
logger.debug('Photo "%s" also copied to %s', photo.title, copy_filepath)
# Give visual feedback in the console, but don't log
if not self.verbose:
print(photo.id)
return True
def run(self, min_date, error_file=None):
"""Run a backup of all photos taken since min_date
"""
if not os.path.exists(self.destination):
os.mkdir(self.destination)
items_with_errors = []
thread_pool = threadpool.ThreadPool(self.threadpoolsize)
page = 1
has_more_pages = True
total_printed = False
def threaded_download(photo):
try:
self.download_photo(photo)
except Exception:
logger.exception("An unexpected error occurred downloading %s (%s)" % (photo.title, photo.id,))
items_with_errors.append(photo)
raise
while has_more_pages:
recently_updated = self.flickr_api.favorites_getList(
min_fave_date=dateutil.parser.parse(min_date).strftime('%s'),
extras="description,url_o,media,original_format,date_upload,date_taken,tags,machine_tags",
per_page=500,
page=page
).find('photos') if self.favorites else self.flickr_api.photos_recentlyUpdated(
min_date=min_date,
extras="description,url_o,media,original_format,date_upload,date_taken,tags,machine_tags",
per_page=500,
page=page
).find('photos')
if page >= int(recently_updated.get('pages')):
has_more_pages = False
else:
page += 1
if not total_printed:
logger.info("Processing %s photos", recently_updated.get('total'))
total_printed = True
for item in recently_updated.findall('photo'):
# Decorate with the Photo class
photo = Photo.fromSearchResult(item, flickr_usernsid=self.flickr_usernsid)
if THREADED:
req = threadpool.WorkRequest(threaded_download, [photo], {})
thread_pool.putRequest(req)
else:
try:
self.download_photo(photo)
except:
logger.exception("An unexpected error occurred downloading %s (%s)" % (photo.title, photo.id,))
items_with_errors.append(photo)
thread_pool.wait()
if items_with_errors:
logger.warning("%d items could not be downloaded. Retrying %d times.", len(items_with_errors), self.max_retries)
return self.retry(items_with_errors, error_file=error_file)
return True
def download(self, ids, error_file=None):
"""Download photos with the given ids
"""
if not os.path.exists(self.destination):
os.mkdir(self.destination)
items_with_errors = []
thread_pool = threadpool.ThreadPool(self.threadpoolsize)
def threaded_download(photo):
try:
self.download_photo(photo)
except Exception:
logger.exception("An unexpected error occurred downloading %s (%s)", photo.title, photo.id)
items_with_errors.append(photo)
raise
logger.info("Processing %d photos", len(ids))
for id in ids:
item = self.flickr_api.photos_getInfo(photo_id=id)
# Decorate with the Photo class
photo = Photo.fromInfo(item.find('photo'), flickr_usernsid=self.flickr_usernsid)
if THREADED:
req = threadpool.WorkRequest(threaded_download, [photo], {})
thread_pool.putRequest(req)
else:
try:
self.download_photo(photo)
except:
logger.exception("An unexpected error occurred downloading %s (%s)", photo.title, photo.id)
items_with_errors.append(photo)
thread_pool.wait()
if items_with_errors:
if self.verbose:
logger.warning("%d items could not be downloaded. Retrying %d times", len(items_with_errors), self.max_retries)
return self.retry(items_with_errors, error_file=error_file)
return True
# Helpers
def retrieve_flickr_token(self):
flickr_api = flickrapi.FlickrAPI(FLICKR_API_KEY, FLICKR_API_SECRET, token_cache_location=self.token_cache)
# Make sure the token is still valid if we have one
if flickr_api.token_cache.token:
try:
flickr_api.test.login()
except flickrapi.exceptions.FlickrError:
flickr_api.flickr_oauth.token = None
del flickr_api.token_cache.token
# Get a new token via the user if we don't have one
if not flickr_api.token_cache.token:
flickr_api.get_request_token(oauth_callback='oob')
authorize_url = flickr_api.auth_url(perms='read')
print("No token found. You must visit this URL and get the verifier code: %s" % authorize_url)
verifier = input('Enter code: ')
flickr_api.get_access_token(verifier)
# Return token information
flickr_usernsid = flickr_api.token_cache.token.user_nsid
return (flickr_api, flickr_usernsid)
def get_photo_sets(self, photo):
return self.flickr_api.photos_getAllContexts(photo_id=photo.id).findall('set')
def normalize_filename(self, filename):
# Take a rather liberal approach to what's an allowable filename
return re.sub(r'[^\w\-_\. \?\'!]', '_', filename)
#return filename.replace(os.path.sep, '').encode('ascii', 'xmlcharrefreplace').decode('ascii')
def get_set_directory(self, set_info):
dirname = os.path.join(self.destination, self.normalize_filename(set_info.get('title')))
with dirlock:
if not os.path.exists(dirname):
logger.debug("Creating directory %s", dirname)
os.mkdir(dirname)
return dirname
def get_date_directory(self, parent, photo):
date_taken = photo.date_taken.split(' ')[0]
year, month, day = date_taken.split('-')
dirname = os.path.join(parent, year, month, day)
with dirlock:
if not os.path.exists(dirname):
os.makedirs(dirname)
return dirname
def write_metadata(self, photo_filepath, photo):
filename = photo_filepath + "." + METADATA_EXTENSION
with open(filename, 'w', encoding='utf-8') as f:
print("[Information]", file=f)
print("id = %s" % photo.id, file=f)
print("title = %s" % photo.title, file=f)
print("description = %s" % (photo.description or ""), file=f)
print("public = %s" % ("yes" if photo.is_public else "no"), file=f)
print("friends = %s" % ("yes" if photo.is_friend else "no"), file=f)
print("family = %s" % ("yes" if photo.is_family else "no"), file=f)
print("taken = %s" % photo.date_taken, file=f)
print("tags = %s" % ' '.join(photo.tags), file=f)
def retry(self, items_with_errors, error_file=None):
retry_count = 0
while retry_count < self.max_retries:
# Retry, this time without threading
retry_count += 1
still_in_error = []
for photo in items_with_errors:
try:
self.download_photo(photo)
except:
logger.exception("An unexpected error occurred downloading %s (%s)", photo.title, photo.id)
still_in_error.append(photo)
items_with_errors = still_in_error
if not items_with_errors:
break
if items_with_errors:
logger.error("Download of the following items did not succeed, even after %d retries: %s", self.max_retries, ' '.join([photo.id for photo in items_with_errors]))
if error_file:
with open(error_file, 'a') as ef:
for photo in items_with_errors:
print(photo.id, file=ef)
return False
return True
#
# CLI
#
def main():
# Process command line arguments
parser = argparse.ArgumentParser(description='Incremental Flickr backup')
parser.add_argument('-f', '--from', dest='from_date', help='Start date (YYYY-MM-DD)')
parser.add_argument('-v', '--verbose', action='store_true', help='Log progress information')
parser.add_argument('-o', '--store-once', action='store_true', help='Only store photos once, even if they appear in multiple sets')
parser.add_argument('--favorites', action='store_true', help='Download favorites instead of own photos. Implies --store-once and does not organise photos into folders based on sets.')
parser.add_argument('-k', '--keep-existing', action='store_true', help='Keep existing photos (default is to replace in case they have changed)')
parser.add_argument('-r', '--retry', type=int, default=1, help='Retry download of failed images N times default is to retry once)')
parser.add_argument('-e', '--error-file', help='Append ids of erroneous items to this file, to allow retry later')
parser.add_argument('-d', '--download', metavar='FILE', help='Attempt to download the photos with the ids in the given file, one per line (usually saved by the --error-file option)')
parser.add_argument('-l', '--log-file', help='Log warnings and errors to the given file')
parser.add_argument('--token-cache', dest='token_cache', help="Path to a directory where the login token data will be stored. Must be secure. Defaults to ~/.flickr")
parser.add_argument('destination', help='Destination directory')
arguments = parser.parse_args()
destination = arguments.destination
success = False
# Setup logging
if arguments.verbose:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.INFO)
logger.propagate = False
# Console logging
console_log_handler = logging.StreamHandler()
console_log_formatter = logging.Formatter('%(message)s')
console_log_handler.setFormatter(console_log_formatter)
logger.addHandler(console_log_handler)
# File logging
if arguments.log_file:
file_log_handler = logging.FileHandler(arguments.log_file)
file_log_formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s', "%Y-%m-%d %H:%M:%S")
file_log_handler.setFormatter(file_log_formatter)
logger.addHandler(file_log_handler)
# Run
if arguments.download:
if not os.path.exists(arguments.download):
logger.error("Download file %s does not exist.", arguments.download)
sys.exit(2)
logger.info("Running backup of images found in %s", arguments.download)
with open(arguments.download, 'r') as f:
ids = [id.strip() for id in f.readlines()]
backup = FlickrBackup(destination,
store_once=arguments.store_once,
keep_existing=arguments.keep_existing,
retry=arguments.retry,
verbose=arguments.verbose
)
success = backup.download(ids, arguments.error_file)
else:
from_date = arguments.from_date
# Figure out the start date
stamp_filename = os.path.join(destination, STAMP_FILENAME)
if not from_date:
if os.path.exists(stamp_filename):
with open(stamp_filename, 'r') as stamp:
from_date = stamp.read().strip()
if not from_date:
logger.error("No start date specified and no previous time stamp found in %s.", stamp_filename)
sys.exit(2)
# Capture today's date (the script may run for more than one day)
today = datetime.date.today().isoformat()
# Run the backup
logger.info("Running backup of images updated since %s", from_date)
backup = FlickrBackup(destination,
store_once=arguments.store_once,
keep_existing=arguments.keep_existing,
favorites=arguments.favorites,
retry=arguments.retry,
verbose=arguments.verbose,
token_cache=arguments.token_cache
)
success = backup.run(from_date, arguments.error_file)
# Store today's date
with open(stamp_filename, 'w') as stamp:
stamp.write(today)
if not success:
sys.exit(1)
logger.info("Done")
if __name__ == '__main__':
main()