-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtoglacier.go
658 lines (547 loc) · 21.4 KB
/
toglacier.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
package toglacier
import (
"context"
"fmt"
"net/smtp"
"os"
"regexp"
"sort"
"strings"
"time"
"github.com/pkg/errors"
"github.com/rafaeljusto/toglacier/internal/archive"
"github.com/rafaeljusto/toglacier/internal/cloud"
"github.com/rafaeljusto/toglacier/internal/log"
"github.com/rafaeljusto/toglacier/internal/report"
"github.com/rafaeljusto/toglacier/internal/storage"
)
// ToGlacier manages backups in the cloud.
type ToGlacier struct {
Context context.Context
Archive archive.Archive
Envelop archive.Envelop
Cloud cloud.Cloud
Storage storage.Storage
Logger log.Logger
}
// Backup create an archive and send it to the cloud. Optionally encrypt the
// backup with the backupSecret password, if you leave it blank no encryption
// will be performed. There's also an option to stop the backup if there're to
// many files modified (ransomware detection), the modifyTolerance is the
// percentage (0 - 100) of modified files that is tolerated. If there's no need
// to keep track of the modified files set modifyTolerance to 0 or 100. You
// could also ignore some files or directories in the backup paths using regular
// expressions in the ignorePatterns parameter.
func (t ToGlacier) Backup(backupPaths []string, backupSecret string, modifyTolerance float64, ignorePatterns []*regexp.Regexp) error {
backupReport := report.NewSendBackup()
defer func() {
report.Add(backupReport)
}()
// retrieve the latest backup so we can analyze the files that changed
backups, err := t.ListBackups(false)
if err != nil {
return errors.WithStack(err)
}
var archiveInfo archive.Info
if len(backups) > 0 {
// the newest backup is always in the first position
archiveInfo = backups[0].Info
}
timeMark := time.Now()
filename, archiveInfo, err := t.Archive.Build(archiveInfo, ignorePatterns, backupPaths...)
if err != nil {
backupReport.Errors = append(backupReport.Errors, err)
return errors.WithStack(err)
}
if filename == "" {
// if the filename is empty, the tarball wasn't created because no files
// were added, so we just ignore the upload
backupReport.Durations.Build = time.Now().Sub(timeMark)
return nil
}
defer os.Remove(filename)
backupReport.Durations.Build = time.Now().Sub(timeMark)
if t.modifyToleranceReached(archiveInfo, modifyTolerance) {
return errors.WithStack(newError(backupPaths, ErrorCodeModifyTolerance, nil))
}
if backupSecret != "" {
var encryptedFilename string
timeMark = time.Now()
if encryptedFilename, err = t.Envelop.Encrypt(filename, backupSecret); err != nil {
backupReport.Errors = append(backupReport.Errors, err)
return errors.WithStack(err)
}
backupReport.Durations.Encrypt = time.Now().Sub(timeMark)
if err = os.Rename(encryptedFilename, filename); err != nil {
backupReport.Errors = append(backupReport.Errors, err)
return errors.WithStack(err)
}
}
timeMark = time.Now()
if backupReport.Backup, err = t.Cloud.Send(t.Context, filename); err != nil {
backupReport.Errors = append(backupReport.Errors, err)
return errors.WithStack(err)
}
backupReport.Durations.Send = time.Now().Sub(timeMark)
// fill backup id for new and modified files
for path, itemInfo := range archiveInfo {
if itemInfo.Status.Useful() {
itemInfo.ID = backupReport.Backup.ID
archiveInfo[path] = itemInfo
}
}
if err := t.Storage.Save(storage.Backup{Backup: backupReport.Backup, Info: archiveInfo}); err != nil {
backupReport.Errors = append(backupReport.Errors, err)
return errors.WithStack(err)
}
return nil
}
func (t ToGlacier) modifyToleranceReached(archiveInfo archive.Info, modifyTolerance float64) bool {
if len(archiveInfo) == 0 || modifyTolerance == 0 || modifyTolerance == 100 {
return false
}
var modified int
for _, itemInfo := range archiveInfo {
if itemInfo.Status == archive.ItemInfoStatusModified {
modified++
}
}
modifyPercentage := float64(modified*100) / float64(len(archiveInfo))
if modifyPercentage > modifyTolerance {
t.Logger.Warningf("toglacier: detected %.2f%% of modified files (%d/%d), tolerance limited at %.2f%%, aborting backup",
modifyPercentage, modified, len(archiveInfo), modifyTolerance)
return true
}
return false
}
// ListBackups show the current backups. With the remote flag it is possible to
// list the backups tracked locally or retrieve the cloud inventory.
func (t ToGlacier) ListBackups(remote bool) (storage.Backups, error) {
if remote {
return t.listRemoteBackups()
}
backups, err := t.Storage.List()
if err != nil {
return nil, errors.WithStack(err)
}
sort.Sort(backupsByCreationDate(backups))
return backups, nil
}
func (t ToGlacier) listRemoteBackups() (storage.Backups, error) {
listBackupsReport := report.NewListBackups()
defer func() {
report.Add(listBackupsReport)
}()
timeMark := time.Now()
remoteBackups, err := t.Cloud.List(t.Context)
if err != nil {
listBackupsReport.Errors = append(listBackupsReport.Errors, err)
return nil, errors.WithStack(err)
}
listBackupsReport.Durations.List = time.Now().Sub(timeMark)
// retrieve local backups information only after the remote backups, because the
// remote backups operations can take a while, and a concurrent action could
// change the local backups during this time
backups, err := t.Storage.List()
if err != nil {
listBackupsReport.Errors = append(listBackupsReport.Errors, err)
return nil, errors.WithStack(err)
}
// http://docs.aws.amazon.com/amazonglacier/latest/dev/working-with-archives.html#client-side-key-map-concept
//
// If you maintain client-side archive metadata, note that Amazon Glacier
// maintains a vault inventory that includes archive IDs and any
// descriptions you provided during the archive upload. You might
// occasionally download the vault inventory to reconcile any issues in your
// client-side database you maintain for the archive metadata. However,
// Amazon Glacier takes vault inventory approximately daily. When you
// request a vault inventory, Amazon Glacier returns the last inventory it
// prepared, a point in time snapshot.
// TODO: if the change is greater than 20% something is really wrong, and
// maybe the best approach is to do nothing and report the problem.
var kept []string
for _, backup := range backups {
// http://docs.aws.amazon.com/amazonglacier/latest/dev/vault-inventory.html#vault-inventory-about
//
// Amazon Glacier updates a vault inventory approximately once a day,
// starting on the day you first upload an archive to the vault. If there
// have been no archive additions or deletions to the vault since the last
// inventory, the inventory date is not updated. When you initiate a job for
// a vault inventory, Amazon Glacier returns the last inventory it
// generated, which is a point-in-time snapshot and not real-time data. Note
// that after Amazon Glacier creates the first inventory for the vault, it
// typically takes half a day and up to a day before that inventory is
// available for retrieval.
if backup.Backup.CreatedAt.After(time.Now().Add(-24 * time.Hour)) {
// recent backups could not be in the inventory yet
kept = append(kept, backup.Backup.ID)
t.Logger.Debugf("toglacier: backup id “%s” kept because is to recent", backup.Backup.ID)
continue
}
if err := t.Storage.Remove(backup.Backup.ID); err != nil {
listBackupsReport.Errors = append(listBackupsReport.Errors, err)
return nil, errors.WithStack(err)
}
}
sort.Strings(kept)
syncBackups := make(storage.Backups, 0, len(remoteBackups))
for i, remoteBackup := range remoteBackups {
// check if a recent backup appeared in the inventory
if j := sort.SearchStrings(kept, remoteBackup.ID); j < len(kept) && kept[j] == remoteBackup.ID {
if err := t.Storage.Remove(kept[j]); err != nil {
listBackupsReport.Errors = append(listBackupsReport.Errors, err)
return nil, errors.WithStack(err)
}
t.Logger.Debugf("toglacier: backup id “%s” removed because it was found remotely", kept[j])
kept = append(kept[:j], kept[j+1:]...)
}
// we should keep the archive information to be able to build incremental
// backups again. Another alternative is build the archive information from
// the uploaded backup, but it is really slow. Anyway, when retrieving the
// backup, if there's no archive information, we will try to extract it from
// the backup
var archiveInfo archive.Info
for _, backup := range backups {
if backup.Backup.ID == remoteBackup.ID {
archiveInfo = backup.Info
break
}
}
syncBackups = append(syncBackups, storage.Backup{
Backup: remoteBackup,
Info: archiveInfo,
})
if err := t.Storage.Save(syncBackups[i]); err != nil {
listBackupsReport.Errors = append(listBackupsReport.Errors, err)
return nil, errors.WithStack(err)
}
}
// add backups that were kept
for _, id := range kept {
if backup, ok := backups.Search(id); ok {
syncBackups = append(syncBackups, backup)
}
}
sort.Sort(backupsByCreationDate(syncBackups))
return syncBackups, nil
}
// RetrieveBackup recover a specific backup from the cloud. If the backup is
// encrypted it can be decrypted if the backupSecret is informed. Also, it is
// possible to avoid downloading backups that contain only unmodified files with
// the skipUnmodified flag.
func (t ToGlacier) RetrieveBackup(id, backupSecret string, skipUnmodified bool) error {
backups, err := t.Storage.List()
if err != nil {
return errors.WithStack(err)
}
selectedBackup, ok := backups.Search(id)
if !ok {
t.Logger.Warningf("toglacier: backup “%s” not found in local storage")
}
var ignoreMainBackup bool
if selectedBackup.Info == nil {
var filenames map[string]string
// when there's no archive information, retrieve only the desired backup ID.
// We will extract the archive information saved in the backup to detect all
// other backup parts that we need. This is important when the local storage
// got corrupted due to a disaster
if filenames, err = t.Cloud.Get(t.Context, id); err != nil {
return errors.WithStack(err)
}
// there's only one backup downloaded at this point
if selectedBackup.Info, err = t.decryptAndExtract(backupSecret, filenames[id], nil); err != nil {
return errors.WithStack(err)
}
// synchronize the archive information in the local storage only if the
// backup exists
if selectedBackup.Backup.ID != "" {
if err = t.Storage.Save(selectedBackup); err != nil {
return errors.WithStack(err)
}
}
// as we already downloaded the main backup, we should avoid downloading it
// again when retrieving the backup parts
ignoreMainBackup = true
}
ids, idPaths, err := t.extractIDs(id, selectedBackup.Info, ignoreMainBackup, skipUnmodified)
if err != nil {
return errors.WithStack(err)
}
filenames, err := t.Cloud.Get(t.Context, ids...)
if err != nil {
return errors.WithStack(err)
}
for id, filename := range filenames {
if selectedBackup, ok = backups.Search(id); !ok {
t.Logger.Warningf("toglacier: backup “%s” not found in local storage")
}
if selectedBackup.Info, err = t.decryptAndExtract(backupSecret, filename, idPaths[id]); err != nil {
return errors.WithStack(err)
}
if err = t.synchronizeArchiveInfo(selectedBackup, backups); err != nil {
return errors.WithStack(err)
}
}
return nil
}
func (t ToGlacier) extractIDs(id string, archiveInfo archive.Info, ignoreMainBackup, skipUnmodified bool) (ids []string, idPaths map[string][]string, err error) {
idPaths = make(map[string][]string)
for path, itemInfo := range archiveInfo {
// if we already downloaded the main backup we don't need to download it
// again, and we should also avoid downloading backups parts just to
// retrieve removed files
ignore := (ignoreMainBackup && itemInfo.ID == id) || itemInfo.Status == archive.ItemInfoStatusDeleted
if !ignore && skipUnmodified {
var checksum string
if checksum, err = t.Archive.FileChecksum(path); err != nil {
return nil, nil, errors.WithStack(err)
}
// file did not change since this backup
if checksum == itemInfo.Checksum {
t.Logger.Infof("toglacier: file “%s” unmodified in disk since backup, it will be ignored", path)
ignore = true
}
}
if !ignore {
idPaths[itemInfo.ID] = append(idPaths[itemInfo.ID], path)
}
}
for id := range idPaths {
ids = append(ids, id)
}
return
}
func (t ToGlacier) decryptAndExtract(backupSecret, filename string, filter []string) (archive.Info, error) {
var err error
if backupSecret != "" {
var decryptedFilename string
if decryptedFilename, err = t.Envelop.Decrypt(filename, backupSecret); err != nil {
return nil, errors.WithStack(err)
}
if err = os.Rename(decryptedFilename, filename); err != nil {
return nil, errors.WithStack(err)
}
}
archiveInfo, err := t.Archive.Extract(filename, filter)
if err != nil {
return nil, errors.WithStack(err)
}
// after extracting the content we don't need the archive anymore, but if
// there's some error removing it we don't want to stop the process
if err = os.Remove(filename); err != nil {
t.Logger.Warningf("toglacier: failed to remove file “%s”. details: %s", filename, err)
}
return archiveInfo, nil
}
func (t ToGlacier) synchronizeArchiveInfo(backup storage.Backup, backups storage.Backups) error {
// synchronize the archive information in the local storage only if the
// backup exists
if backup.Backup.ID == "" {
return nil
}
// there're some actions performed locally that aren't synchronized with
// the cloud. For example, when removing a backup we replace the file
// references of the removed backup in other backups, and many archive
// information in the cloud gets outdated. So we will check if all
// references in the remote archive information are valid before replacing
// the local version
if !backups.ValidInfo(backup.Info) {
t.Logger.Warningf("toglacier: archive information from backup “%s” is outdated and will not be synchronized", backup.Backup.ID)
return nil
}
return errors.WithStack(t.Storage.Save(backup))
}
// RemoveBackups delete a backups identified by ids from the cloud and from the
// local storage. It will also try to replace or remove the reference from the
// removed backup on other backups. When it is possible to replace the reference
// it will try to get the file version right before the removed backup date.
func (t ToGlacier) RemoveBackups(ids ...string) error {
for _, id := range ids {
if err := t.removeBackup(id); err != nil {
return errors.WithStack(err)
}
}
return nil
}
func (t ToGlacier) removeBackup(id string) error {
if err := t.Cloud.Remove(t.Context, id); err != nil {
return errors.WithStack(err)
}
if err := t.rearrangeStorage(id); err != nil {
// TODO: an error here will cause an inconsistency between the cloud and the
// local storage
return errors.WithStack(err)
}
if err := t.Storage.Remove(id); err != nil {
// TODO: an error here will cause an inconsistency between the cloud and the
// local storage
return errors.WithStack(err)
}
return nil
}
func (t ToGlacier) rearrangeStorage(id string) error {
// remove references from this id from other backups to keep the consistency
// of the local storage. We will try to replace the reference id by the most
// recently version of the file when possible
backups, err := t.Storage.List()
if err != nil {
return errors.WithStack(err)
}
// order backups by creation date
sort.Sort(backupsByCreationDate(backups))
backupIndex := -1
var archiveInfo archive.Info
// store the replaceable file references
fallbackFiles := make(map[string]string)
// first we move from the most recent backup to the oldest one, looking for
// the backup position that will be removed, and to store all files that we
// should look for. After we got the files to look for, we will continue
// looking older backups to find reference for this files.
for i := 0; i < len(backups) && (backupIndex == -1 || len(archiveInfo) > 0); i++ {
if backups[i].Backup.ID == id {
backupIndex = i
// we are only interested in modified files, because if it is a new file,
// there's nothing we can do, if it is a unmodified file, the id is
// already referencing another backup, and if it is a deleted file it will
// not appear in newer backups
archiveInfo = backups[i].Info.FilterByStatuses(archive.ItemInfoStatusModified)
continue
}
if backupIndex > -1 {
// keep looking in older backups for the desired files
for filename, itemInfo := range backups[i].Info {
// we are only interested in new and modified file matches
if _, ok := archiveInfo[filename]; ok && itemInfo.Status.Useful() {
fallbackFiles[filename] = backups[i].Backup.ID
delete(archiveInfo, filename)
}
}
}
}
// now we need to look for backups that were created after the removed one, so
// we can replace the reference of the files or removed if we couldn't find
// any match
for i := backupIndex - 1; i >= 0; i-- {
if t.rearrangeArchiveInfo(id, backups[i].Info, fallbackFiles) {
if err = t.Storage.Save(backups[i]); err != nil {
return errors.WithStack(err)
}
}
}
return nil
}
func (t ToGlacier) rearrangeArchiveInfo(id string, archiveInfo archive.Info, fallbackFiles map[string]string) (modified bool) {
for filename, itemInfo := range archiveInfo {
if itemInfo.ID != id {
continue
}
if newID, ok := fallbackFiles[filename]; ok {
// we don't need to look for the status in the item info because if it
// references the removed backup it should be the unmodified or deleted
// statuses
itemInfo.ID = newID
archiveInfo[filename] = itemInfo
} else {
// https://golang.org/ref/spec#For_range
// If map entries that have not yet been reached are removed during
// iteration, the corresponding iteration values will not be produced
delete(archiveInfo, filename)
}
modified = true
}
return
}
// RemoveOldBackups delete old backups from the cloud. This will optimize the
// cloud space usage, as too old backups aren't used.
func (t ToGlacier) RemoveOldBackups(keepBackups int) error {
removeOldBackupsReport := report.NewRemoveOldBackups()
defer func() {
report.Add(removeOldBackupsReport)
}()
timeMark := time.Now()
backups, err := t.ListBackups(false)
removeOldBackupsReport.Durations.List = time.Now().Sub(timeMark)
if err != nil {
removeOldBackupsReport.Errors = append(removeOldBackupsReport.Errors, err)
return errors.WithStack(err)
}
sort.Sort(backupsByCreationDate(backups))
// with the incremental backup we cannot remove backups without checking the
// archive info to identify partial backup entries
var preserveBackups []string
for i := 0; i < keepBackups && i < len(backups); i++ {
for _, itemInfo := range backups[i].Info {
if itemInfo.Status != archive.ItemInfoStatusDeleted {
preserveBackups = append(preserveBackups, itemInfo.ID)
}
}
}
sort.Strings(preserveBackups)
timeMark = time.Now()
for i := keepBackups; i < len(backups); i++ {
// check if the backup isn't referenced by a active backup
if j := sort.SearchStrings(preserveBackups, backups[i].Backup.ID); j < len(preserveBackups) && preserveBackups[j] == backups[i].Backup.ID {
continue
}
removeOldBackupsReport.Backups = append(removeOldBackupsReport.Backups, backups[i].Backup)
if err := t.RemoveBackups(backups[i].Backup.ID); err != nil {
removeOldBackupsReport.Errors = append(removeOldBackupsReport.Errors, err)
return errors.WithStack(err)
}
}
removeOldBackupsReport.Durations.Remove = time.Now().Sub(timeMark)
return nil
}
// SendReport send information from the actions performed by this tool via
// e-mail to an administrator.
func (t ToGlacier) SendReport(emailInfo EmailInfo) error {
r, err := report.Build(emailInfo.Format)
if err != nil {
return errors.WithStack(err)
}
body := fmt.Sprintf(`From: %s
To: %s
Subject: toglacier report
MIME-Version: 1.0
Content-Type: %s; charset=utf-8
%s`, emailInfo.From, strings.Join(emailInfo.To, ","), emailInfo.Format, r)
var auth smtp.Auth
if emailInfo.Username != "" && emailInfo.Password != "" {
auth = smtp.PlainAuth("", emailInfo.Username, emailInfo.Password, emailInfo.Server)
}
err = emailInfo.Sender.SendMail(fmt.Sprintf("%s:%d", emailInfo.Server, emailInfo.Port), auth, emailInfo.From, emailInfo.To, []byte(body))
return errors.WithStack(err)
}
// EmailInfo stores all necessary information to send an e-mail.
type EmailInfo struct {
Sender EmailSender
Server string
Port int
Username string
Password string
From string
To []string
Format report.Format
}
// EmailSender e-mail API to make it easy to mock the smtp.SendEmail function.
type EmailSender interface {
SendMail(addr string, a smtp.Auth, from string, to []string, msg []byte) error
}
// EmailSenderFunc helper function to create a fast implementation of the
// EmailSender interface.
type EmailSenderFunc func(addr string, a smtp.Auth, from string, to []string, msg []byte) error
// SendMail sends the e-mail.
func (r EmailSenderFunc) SendMail(addr string, a smtp.Auth, from string, to []string, msg []byte) error {
return r(addr, a, from, to, msg)
}
// backupsByCreationDate reorder the backups by reverse creation date.
type backupsByCreationDate storage.Backups
// Len returns the number of backups.
func (b backupsByCreationDate) Len() int { return len(b) }
// Less compares two positions of the slice and verifies the preference. They
// are ordered from the newest backup to the oldest.
func (b backupsByCreationDate) Less(i, j int) bool {
return b[i].Backup.CreatedAt.After(b[j].Backup.CreatedAt)
}
// Swap change the backups position inside the slice.
func (b backupsByCreationDate) Swap(i, j int) { b[i], b[j] = b[j], b[i] }