forked from rtucker/imap2maildir
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimap2maildir
executable file
·604 lines (511 loc) · 22.2 KB
/
imap2maildir
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
#!/usr/bin/python
"""
Mirrors the contents of an IMAP4 mailbox into a local maildir or mbox.
Intended for keeping a local backup of a remote IMAP4 mailbox to protect
against loss. Very handy for backing up "[Gmail]/All Mail" from your
Gmail account, to snag all your archived mail. Re-running it on a regular
basis will update only the stuff it needs to.
Once I need to, I'll write a restore script ;-)
Ryan Tucker <[email protected]>
TODO:
PEP-0008 compliance
- Docstrings
"""
version = "%prog 1.10.2 20101018"
try:
from ConfigParser import ConfigParser
except ImportError:
from configparser import ConfigParser
import email
import getpass
import hashlib
import logging
import mailbox
import optparse
import os
import re
try:
import rfc822
except ImportError:
import rfc822py3 as rfc822
import simpleimap
import sqlite3
import sys
import time
# Handler for logging/debugging/output
log = logging.getLogger(__name__)
console = logging.StreamHandler()
log.addHandler(console)
# Some reasonable application defaults
defaults = {
'debug': 1,
'password': False,
'hostname': 'imap.gmail.com',
'ssl': True,
'port': False,
'remotefolder': '[Gmail]/All Mail',
'create': False,
'maxmessages': 0,
'configfile': 'imap2maildir.conf',
'turbo': True,
'type': 'maildir',
'mboxdash': False,
'search': 'SEEN',
'oauth_consumer_key': None,
'oauth_consumer_secret': None,
}
class SeenMessagesCache(object):
""" Cache for seen message UIDs and Hashes
"""
def __init__(self):
""" Constructor
"""
self.uids = None
self.hashes = None
class lazyMaildir(mailbox.Maildir):
""" Override the _refresh method, based on patch from
http://bugs.python.org/issue1607951
by A.M. Kuchling, 2009-05-02
"""
def __init__(self, dirname, factory=rfc822.Message, create=True):
"""Initialize a lazy Maildir instance."""
mailbox.Maildir.__init__(self, dirname, factory, create)
self._last_read = None # Records the last time we read cur/new
def _refresh(self):
"""Update table of contents mapping."""
new_mtime = os.path.getmtime(os.path.join(self._path, 'new'))
cur_mtime = os.path.getmtime(os.path.join(self._path, 'cur'))
if (self._last_read is not None and
new_mtime <= self._last_read and cur_mtime <= self._last_read):
return
self._toc = {}
def update_dir (subdir):
""" update_dir
"""
path = os.path.join(self._path, subdir)
for entry in os.listdir(path):
p = os.path.join(path, entry)
if os.path.isdir(p):
continue
uniq = entry.split(self.colon)[0]
self._toc[uniq] = os.path.join(subdir, entry)
update_dir('new')
update_dir('cur')
# We record the current time - 1sec so that, if _refresh() is called
# again in the same second, we will always re-read the mailbox
# just in case it's been modified. (os.path.mtime() only has
# 1sec resolution.) This results in a few unnecessary re-reads
# when _refresh() is called multiple times in the same second,
# but once the clock ticks over, we will only re-read as needed.
now = int(time.time() - 1)
self._last_read = time.time() - 1
def make_hash(size, date, msgid):
""" Returns a hash of a message given the size, date, and msgid thingies.
"""
return hashlib.sha1('%i::%s::%s' % (size, date, msgid)).hexdigest()
def open_sql_session(filename):
""" Opens a SQLite database, initializing it if required
"""
log.debug("Opening sqlite3 database '%s'" % filename)
conn = sqlite3.connect(filename)
c = conn.cursor()
# gather info about the seenmessages table
c.execute('pragma table_info(seenmessages)')
columns = ' '.join(i[1] for i in c.fetchall()).split()
if columns == []:
# need to create the seenmessages table
c.execute("""create table seenmessages
(hash text not null unique, mailfile text not null, uid integer)""")
elif not 'uid' in columns:
# old db; need to add a column for uid
c.execute("""alter table seenmessages add column uid integer""")
conn.commit()
return conn
def check_message(conn, mbox, hash=None, uid=None, seencache=None):
""" Checks to see if a given message exists.
"""
c = conn.cursor()
if seencache:
if seencache.hashes is None:
# Populate the hash cache
log.debug("Populating hash cache...")
seencache.hashes = {}
c.execute('select hash,mailfile from seenmessages')
for result in c:
seencache.hashes[str(result[0])] = result[1]
log.debug("Hash cache: %i hashes" % len(seencache.hashes))
if seencache.uids is None:
# Populate the uid cache
log.debug("Populating uid cache...")
seencache.uids = {}
c.execute('select uid,mailfile from seenmessages')
for result in c:
seencache.uids[str(result[0])] = result[1]
log.debug("Uid cache: %i uids" % len(seencache.uids))
if hash:
if str(hash) in seencache.hashes:
mailfile = seencache.hashes[hash]
else:
c.execute('select mailfile from seenmessages where hash=?', (hash,))
row = c.fetchone()
if row:
log.debug("Cache miss on hash %s", hash)
mailfile = row[0]
else:
return False
elif uid:
if str(uid) in seencache.uids:
mailfile = seencache.uids[str(uid)]
else:
c.execute('select mailfile from seenmessages where uid=?', (uid,))
row = c.fetchone()
if row:
log.debug("Cache miss on uid %s" % uid)
mailfile = row[0]
else:
return False
else:
return False
if str(mailfile).startswith('POISON-'):
# This is a fake poison filename! Assume truth.
log.warning("Poison filename detected; assuming the message "
"exists and all is well: %s :: %s",
hash or uid, mailfile)
return True
elif isinstance(mbox, mailbox.mbox):
# mailfile will be an int
return int(mailfile) in mbox
elif isinstance(mbox, lazyMaildir):
# mailfile will be a string; use mbox.get because it is faster
return mbox.get(mailfile)
else:
# uhh let's wing it
return mailfile in mbox
def store_hash(conn, hash, mailfile, uid):
""" Given a database connection, hash, mailfile, and uid,
stashes it in the database
"""
c = conn.cursor()
# nuke it if it's already there. (can happen if disk file goes away)
cur = c.execute('delete from seenmessages where hash = ?', (hash, ))
if cur.rowcount > 0:
log.debug('!!! Nuked duplicate hash %s' % hash)
c.execute('insert into seenmessages values (?,?,?)', (hash, mailfile, uid))
conn.commit()
def add_uid_to_hash(conn, hash, uid):
""" Adds a uid to a hash that's missing its uid
"""
c = conn.cursor()
c.execute('update seenmessages set uid = ? where hash = ?', (uid, hash))
conn.commit()
def open_mailbox_maildir(directory, create=False):
""" There is a mailbox here.
"""
return lazyMaildir(directory, create=create)
def open_mailbox_mbox(filename, create=False):
""" Open a mbox file, lock for writing
"""
mbox = mailbox.mbox(filename, create=create)
mbox.lock()
return mbox
def smells_like_maildir(working_dir):
""" Quick check for the cur/tmp/new folders
"""
return os.path.exists(os.path.join(working_dir, 'cur')) and \
os.path.exists(os.path.join(working_dir, 'new')) and \
os.path.exists(os.path.join(working_dir, 'tmp'))
def parse_config_file(defaults,configfile='imap2maildir.conf'):
""" Parse config file, if exists.
Returns a tuple with a ConfigParser instance and either True or
False, depending on whether the config was read...
"""
config = ConfigParser(defaults)
if config.read(configfile):
log.debug('Reading config from ' + configfile)
return (config, True)
else:
log.debug('No config found at ' + configfile)
return (config, False)
class FirstOptionParser(optparse.OptionParser):
""" Adjusts parse_args so it won't complain too heavily about
options that don't exist.
Lifted lock, stock, and barrel from /usr/lib/python2.6/optparse.py
"""
def parse_args(self, args=None, values=None):
"""
parse_args(args : [string] = sys.argv[1:],
values : Values = None)
-> (values : Values, args : [string])
Parse the command-line options found in 'args' (default:
sys.argv[1:]). Any errors result in a call to 'error()', which
by default prints the usage message to stderr and calls
sys.exit() with an error message. On success returns a pair
(values, args) where 'values' is an Values instance (with all
your option values) and 'args' is the list of arguments left
over after parsing options.
"""
rargs = self._get_args(args)
if values is None:
values = self.get_default_values()
self.rargs = rargs
self.largs = largs = []
self.values = values
while 1:
try:
stop = self._process_args(largs, rargs, values)
break
except optparse.BadOptionError:
# Just a bad option, let's try this again
pass
except (optparse.OptionValueError) as err:
self.error(str(err))
args = largs + rargs
return self.check_values(values, args)
def parse_options(defaults):
""" First round of command line parsing: look for a -c option.
"""
firstparser = FirstOptionParser(add_help_option=False)
firstparser.set_defaults(configfile=defaults['configfile'])
firstparser.add_option("-c", "--config-file", dest="configfile")
(firstoptions, firstargs) = firstparser.parse_args()
# Parse a config file
(parsedconfig, gotconfig) = parse_config_file(
defaults, configfile=firstoptions.configfile)
# Parse command line options
usage = "usage: %prog [options]"
description = "A script to copy a remote IMAP folder to a local mail "
description += "storage area. Ideal for incremental backups of mail "
description += "from free webmail providers, or perhaps as an "
description += "alternative to fetchmail. Supports mbox and maildir, "
description += "despite the name. "
description += "See COPYRIGHT for your rights; "
description += "https://github.com/rtucker/imap2maildir/ for info."
if gotconfig:
description = description + '\n\nConfiguration defaults read from \
file "%s"' % firstoptions.configfile
parser = optparse.OptionParser(usage=usage, version=version,
description=description)
# Set up some groups
required = optparse.OptionGroup(parser, "Required options")
optional = optparse.OptionGroup(parser, "Optional and debugging options")
# Set the defaults...
if gotconfig: sectionname = 'imap2maildir'
else: sectionname = 'DEFAULT'
clist = parsedconfig.items(sectionname, raw=True)
for i in clist:
iname = i[0]
if i[1] == 'False': ivalue = False
elif i[1] == 'True': ivalue = True
elif i[0] in ['port', 'debug', 'maxmessages']: ivalue = int(i[1])
else: ivalue = i[1]
parser.set_default(iname, ivalue)
# Define the individual options
required.add_option("-u", "--username", dest="username",
help="Username for authentication to IMAP server", metavar="USERNAME")
required.add_option("-d", "--destination", dest="destination",
help="Where to store the mail, e.g. ~/Backups/Gmail",
metavar="PATH")
optional.add_option("-p", "--password", dest="password",
help="Password for IMAP server. Default: prompt user",
metavar="PASSWORD")
optional.add_option("-H", "--hostname", dest="hostname",
help="Hostname of IMAP server, default: %default", metavar="HOSTNAME")
optional.add_option("-P", "--port", dest="port",
help="Port number. Default: 993 (SSL), 143 (clear)", metavar="PORT")
optional.add_option("-v", "--verbose", dest="debug",
help="Turns up the verbosity", action="store_const", const=2)
optional.add_option("-q", "--quiet", dest="debug",
help="Quiets all output (except prompts and errors)",
action="store_const", const=0)
optional.add_option("-r", "--remote-folder", dest="remotefolder",
help="Remote IMAP folder. Default: %default",
metavar="FOLDERNAME")
optional.add_option("-s", "--search", dest="search",
help="IMAP4 search criteria to use. Default: %default",
metavar="CRITERIA")
optional.add_option("--create", dest="create",
help="If --destination doesn't exist, create it", action="store_true")
optional.add_option("--no-turbo", "-T", dest="turbo",
help="Check for message locally before asking IMAP. Default: %default",
action="store_false")
optional.add_option("-m", "--max-messages", dest="maxmessages",
help="How many messages to process in one run (0=infinite). " +
"Default: %default",
metavar="MAX", type="int")
optional.add_option("-c", "--config-file", dest="configfile",
help="Configuration file to use. Default: %default")
optional.add_option("-S", "--ssl", dest="ssl",
help="Use SSL to connect, default: %default", action="store_true")
optional.add_option("-t", "--type", dest="type", action="store",
help="Mailbox type. Choice of: maildir, mbox. Default: %default",
choices=['maildir', 'mbox'])
optional.add_option("--mboxdash", dest="mboxdash", action="store_true",
help="Use - in the mbox From line instead of sender's address. " +
"Default: %default")
optional.add_option("--oauth_consumer_key", dest="oauth_consumer_key",
help="Two legged OAuth domain consumer key")
optional.add_option("--oauth_consumer_secret", dest="oauth_consumer_secret",
help="Two legged OAuth domain consumer secret")
# Parse
parser.add_option_group(required)
parser.add_option_group(optional)
(options, args) = parser.parse_args()
# Check for required options
if not options.username:
parser.error("Must specify a username (-u/--username).")
if not options.destination:
parser.error("Must specify a destination directory (-d/--destination).")
if not os.path.exists(options.destination):
if options.create:
pass
else:
parser.error("Destination '%s' does not exist. Use --create."
% options.destination)
elif (options.type == 'maildir'
and not smells_like_maildir(options.destination)):
parser.error("Directory '%s' exists, but it isn't a maildir."
% options.destination)
if not options.password and options.oauth_consumer_key == None:
options.password = getpass.getpass()
# Set up debugging
if options.debug == 0:
log.setLevel(logging.ERROR)
elif options.debug == 1:
log.setLevel(logging.INFO)
else:
log.setLevel(logging.DEBUG)
return options
def copy_messages_by_folder(folder, db, imap, mbox, limit=0, turbo=False,
mboxdash=False, search=None, seencache=None):
"""Copies any messages that haven't yet been seen from imap to mbox.
copy_messages_by_folder(folder=simpleimap.SimpleImapSSL().Folder(),
db=open_sql_session(),
imap=simpleimap.SimpleImapSSL(),
mbox=open_mailbox_*(),
limit=max number of messages to handle (0 = inf),
turbo=boolean,
mboxdash=use '-' for mbox From line email?,
search=imap criteria (string),
seencache=an object to cache seen messages,
Returns: {'total': total length of folder,
'handled': total messages handled,
'copied': total messages copied,
'copiedbytes': size of total messages copied,
'lastuid': last UID seen}
"""
outdict = {'turbo': 0, 'handled': 0, 'copied': 0, 'copiedbytes': 0, 'lastuid': 0}
outdict['total'] = len(folder)
log.info("Synchronizing %i messages from %s:%s to %s..." % (outdict['total'], folder.host, folder.folder, mbox._path))
msgpath = os.path.join(mbox._path, 'new')
if turbo:
# This will pass the check_message function and some useful cargo
# along to the Summaries() function in the FolderClass. It will
# use this to check the local cache for the message before hitting
# the outside world. (TODO: Make this less suckful.)
log.debug('TURBO MODE ENGAGED!')
folder.__turbo__(lambda uid: check_message(db, mbox, uid=str(uid), seencache=seencache))
else:
log.debug('Not using turbo mode...')
folder.__turbo__(None)
# Iterate through the message summary dicts for the folder.
for i in folder.Summaries(search=search):
# i = {'uid': , 'msgid': , 'size': , 'date': }
# Seen it yet?
msghash = make_hash(i['size'], i['date'], i['msgid'])
if not check_message(db, mbox, hash=msghash, seencache=seencache):
# Hash not found, copy it.
try:
message = imap.get_message_by_uid(i['uid'])
except Exception:
log.exception('ERROR: Could not retrieve message: %s' % repr(i))
if outdict['handled'] < 1:
log.error("Adding message hash %s to seencache, to avoid "
"future problems...", msghash)
store_hash(db, msghash, 'POISON-%s' % msghash, i['uid'])
add_uid_to_hash(db, msghash, i['uid'])
break
if mboxdash:
envfrom = '-'
else:
envfrom = i['envfrom']
message.set_unixfrom("From %s %s" % (envfrom,
time.asctime(imap.parseInternalDate(i['date']))))
msgfile = mbox.add(message)
store_hash(db, msghash, msgfile, i['uid'])
log.debug(' NEW: ' + repr(i))
outdict['copied'] += 1
outdict['copiedbytes'] += i['size']
elif not check_message(db, mbox, uid=str(i['uid']), seencache=seencache):
# UID is missing in the database (old version needs updated)
log.debug('Adding uid %i to msghash %s', i['uid'], msghash)
add_uid_to_hash(db, msghash, i['uid'])
else:
log.debug('Unexpected turbo mode on uid %i', i['uid'])
# Update our counters.
outdict['handled'] += 1
outdict['turbo'] = folder.turbocounter()
if outdict['handled'] % 100 == 0:
percentage = ((outdict['handled'] + outdict['turbo'])/ float(outdict['total'])) * 100
log.info('Copied: %i, Turbo: %i, Seen: %i (%i%%, latest UID %i, date %s)' %
(outdict['copied'], outdict['turbo'], outdict['handled'],
percentage, i['uid'], i['date']))
outdict['lastuid'] = i['uid']
if (outdict['handled'] >= limit) and (limit > 0):
log.info('Limit of %i messages reached' % limit)
break
# Make sure this gets updated...
outdict['turbo'] = folder.turbocounter()
return outdict
def main():
""" main loop
"""
log.debug('Hello. Version %s' % version)
# Parse the command line and config file
options = parse_options(defaults)
# Check to make sure the mailbox type is valid (probably redundant)
if options.type not in ['maildir', 'mbox']:
raise ValueError("No valid mailbox type specified")
# Connect to IMAP server
imapserver = simpleimap.Server(hostname=options.hostname,
username=options.username, password=options.password,
port=options.port, ssl=options.ssl,
oauth_consumer_key=options.oauth_consumer_key,
oauth_consumer_secret=options.oauth_consumer_secret)
imap = imapserver.Get()
# Instantiate a folder
folder = imap.Folder(folder=options.remotefolder)
folder.__keepaliver__(imapserver.Keepalive)
# Open mailbox and database, and copy messages
try:
if options.type == 'maildir':
mbox = open_mailbox_maildir(options.destination, options.create)
db = open_sql_session(os.path.join(options.destination, '.imap2maildir.sqlite'))
elif options.type == 'mbox':
mbox = open_mailbox_mbox(options.destination, options.create)
db = open_sql_session(options.destination + '.sqlite')
seencache = SeenMessagesCache()
result = copy_messages_by_folder(folder=folder,
db=db,
imap=imap,
mbox=mbox,
limit=options.maxmessages,
turbo=options.turbo,
mboxdash=options.mboxdash,
search=options.search,
seencache=seencache)
except (KeyboardInterrupt, SystemExit):
log.warning('Caught interrupt; clearing locks and safing database.')
mbox.unlock()
db.rollback()
raise
except:
log.exception('Exception! Clearing locks and safing database.')
mbox.unlock()
db.rollback()
raise
# Unlock the mailbox if locked.
mbox.unlock()
# Print results.
log.info('FINISHED: Turboed %(turbo)i, handled %(handled)i, copied %(copied)i (%(copiedbytes)i bytes), last UID was %(lastuid)i' % result)
if __name__ == "__main__":
main()