-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathFrost.hpp
1438 lines (1291 loc) · 81.9 KB
/
Frost.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// We need the ClassPath's streams
#include "ClassPath/include/Streams/Streams.hpp"
// We need files too
#include "ClassPath/include/File/File.hpp"
// We need strings
#include "ClassPath/include/Strings/Strings.hpp"
// And hash tables too
#include "ClassPath/include/Hash/HashTable.hpp"
#include "ClassPath/include/Hash/RobinHoodHashTable.hpp"
// We need crypto code too for the key stuff
#include "ClassPath/include/Crypto/OpenSSLWrap.hpp"
#define DEFAULT_INDEX "index.frost"
#define PROTOCOL_VERSION "2.0"
// Forward declare some class we'll be using to avoid including a lot of stuff in the header
namespace File { class Chunk; class MultiChunk; }
namespace Frost
{
/** The kind of memory block we are using */
typedef Utils::MemoryBlock MemoryBlock;
/** The kind of String class we are using too */
typedef Strings::FastString String;
/** This class is used to build sessions keys out of the given user private key.
The symmetric mode of encryption is used in CTR block mode.
where the nonce is derived from the SHA256(Multichunk) ^ counter
The key used for the encryption is derived from the asymmetric encryption
algorithm.
It's updated at pseudo-regular interval, and synchronization point are used
to figure out if the next block is a salt used to update the key or a ciphertext.
Typically, a key is build like this:
@verbatim
// random Salt (256 bits) is generated, || means concatenation.
key = KDF(Salt || MasterKey)
cipheredChunk = Salt
for each encryption block in the multichunk:
nonce = SHA256(Multichunk) ^ counter
cipheredChunk = cipheredChunk || AES_CTR_enc(key, Multichunk, nonce)
// The code below is not implemented yet
// if length(cipheredData) > securityThreshold:
// create new key (see above)
// cipheredChunk = cipheredChunk || newSalt
@endverbatim
For decrypting, the algorithm run in reverse:
@verbatim
Salt = ciphertext[0..256 bits]
key = KDF(Salt || MasterKey)
for each encrypted block in the multichunk:
nonce = SHA256(Multichunk) ^ counter
clearText = AES_CTR_dec(key, cipherText[0..256], nonce)
@endverbatim
*/
class KeyFactory
{
// Type definition and enumerations
public:
/** The cryptographic primitive we use for asymmetric encrypting */
typedef Crypto::OSSL_ECIES<NID_secp224k1> AsymmetricT;
/** The cryptographic primitive we use for symmetric encrypting */
typedef Crypto::OSSL_AES SymmetricT;
/** The cryptographic primitive we use for large dataset hashing */
typedef Crypto::OSSL_SHA256 BigHashT;
/** The private key for asymmetric */
typedef AsymmetricT::PrivateKey AsymPrivKeyT;
/** The public key for asymmetric */
typedef AsymmetricT::PublicKey AsymPubKeyT;
/** The Key derivation function to use */
typedef Hashing::KDF1<256, 256, BigHashT> KeyDerivFuncT;
/** The Key derivation function to use for password */
typedef Hashing::PBKDF1<256, 256, BigHashT> PWKeyDerivFuncT;
/** The master symmetric key */
typedef uint8 KeyT[BigHashT::DigestSize];
// Members
private:
/** The master key that's used while the system is running */
KeyT masterKey;
/** The current salt */
KeyT salt;
/** The current counter */
uint32 counter;
/** The current opaque nonce */
KeyT hashChunkNonce;
// Interface
public:
/** Load the session key out of the given key vault
@param fileVault A path to the file vault to open.
@param cipherMasterKey The ciphered master key that's read from the index database.
@param password The password used to protect the private key
@param ID The key's ID
@return empty string on success, or the error message on failure.
if the file does not exist,
or if the mode does not fit the expected mode (0600)
or if the file format does not work */
String loadPrivateKey(const String & fileVault, const MemoryBlock & cipherMasterKey, const String & password = "", const String & ID = "");
/** Create file vault if it does not exist, store the new created private key,
protected by the given password, and generate a master key to be used for
this session.
@param cipherMasterKey On output, contains the master key that was encrypted with the
generated private key. This should be saved in the index database.
@param fileVault A path to the file vault to create.
@param password The password to use to protect the private key
@return empty string on success, or the error message on failure.
@return false if the file does already exist
(complete directory hierarchy will be created for this file) */
String createMasterKeyForFileVault(MemoryBlock & cipherMasterKey, const String & fileVault, const String & password = "", const String & ID = "");
/** Increment the counter and get the current key.
This must be called before any 'AES_CTR()' call in the algorithm described above */
void incrementNonce(KeyT & keyOut)
{
counter++;
// Check if all parameters are aligned, and if so, process 4 by 4
if (((uint64)(&keyOut[0]) & 0x3) == 0)
{
for (uint32 i = 0; i < ArrSz(salt); i+= sizeof(counter))
*(uint32*)&keyOut[i] = *(uint32*)&hashChunkNonce[i] ^ counter;
return;
}
// Avoid unaligned memory access per loop turn
uint8 cnt[4] = { (uint8)(counter >> 24), (uint8)((counter >> 16) & 0xFF), (uint8)((counter >> 8) & 0xFF), (uint8)(counter & 0xFF) };
for (uint32 i = 0; i < ArrSz(salt); i+= sizeof(counter))
{
keyOut[i+0] = hashChunkNonce[i+0] ^ cnt[0];
keyOut[i+1] = hashChunkNonce[i+1] ^ cnt[1];
keyOut[i+2] = hashChunkNonce[i+2] ^ cnt[2];
keyOut[i+3] = hashChunkNonce[i+3] ^ cnt[3];
}
}
/** Create a new nonce and reset the counter.
This creates the 'nonce' in the algorithm described above */
void createNewNonce(const KeyT & hash) { counter = 0; memcpy(hashChunkNonce, hash, sizeof(hash)); }
/** Create a new key (and a salt).
This creates the 'key' in the algorithm described above */
void createNewKey(KeyT & keyOut)
{
Random::fillBlock(salt, ArrSz(salt));
// Hash the random block to prevent state guessing attacks so no data in output comes from the random output directly.
BigHashT hash; hash.Start(); hash.Hash(salt, ArrSz(salt)); hash.Finalize(salt);
deriveNewKey(keyOut);
}
/** Get the salt */
void getCurrentSalt(KeyT & outSalt) const { memcpy(outSalt, salt, ArrSz(salt)); }
/** Set the current salt (extracted from the ciphertext) */
void setCurrentSalt(const KeyT & inSalt) { memcpy(salt, inSalt, ArrSz(salt)); }
/** Derive the key out of the current salt */
void deriveNewKey(KeyT & keyOut) const
{
KeyDerivFuncT kdf;
kdf.Hash(masterKey, ArrSz(masterKey));
kdf.finalizeWithExtraInfo(keyOut, salt, (uint32)ArrSz(salt));
}
};
/** Get the key factory singleton */
KeyFactory & getKeyFactory() { static KeyFactory factory; return factory; }
/** The progress callback that's called regularly by the backup / restoring process */
struct ProgressCallback
{
enum Action { Backup = 0, Restore, Purge };
enum FlushMode { FlushLine = 0, KeepLine, EraseLine };
String getActionName(const Action action) { const char * actions[] = {"Backup", "Restore", "Purge"}; return actions[action]; }
/** This method is called while an operation is running.
The protocol for the sizeDone, totalSize, index and count is as follow:
1) Each time a new entry is processed, index must be changed (likely increased). The sizeDone value is set to 0, and the totalSize is set to non-zero value.
The callback should not validate the line here if it output on the console/database
2) While the entry is processed, the currentFilename will not change, the index will not change, but the sizeDone and totalSize will likely change.
The callback should still not validate the line here
3) When the entry is done processing, sizeDone and totalSize will be equal.
The callback can validate the line here.
If all sizeDone, totalSize, index, and count are zero, then the line is not validated, action is ignored, only the currentFilename is revelant for output.
@return false to interrupt the process */
virtual bool progressed(const Action action, const String & currentFilename, const uint64 sizeDone, const uint64 totalSize, const uint32 index, const uint32 count, const FlushMode mode) = 0;
/** This method is called when the processing must warn the user.
@return false to interrupt the process */
virtual bool warn(const Action action, const String & currentFilename, const String & message, const uint32 sourceLine = 0) { return true; }
#define WARN_CB(action, file, msg) callback.warn(action, file, msg, __LINE__)
virtual ~ProgressCallback() {}
};
/** The purge strategy.
@sa purgeBackup */
enum PurgeStrategy
{
Fast = 100, //!< The fast strategy for pruning old backup, that's not space efficient.
FindLostChunk = Fast, //!< Find lost chunk and remove them from the database index. If multichunk contains only garbage collected chunks, it's deleted.
Slow = 0, //!< The slow strategy optimize for space, but it's not compute efficient.
MergeMultiChunk = Slow, //!< Find lost chunk and remove them from the database index. Recreate complete multichunk out of the remaining one,
//!< downloading them, removing the useless chunk from them, and uploading complete multichunk again.
};
/** The overwrite strategy.
@sa restoreBackup */
enum OverwritePolicy
{
No = 0, //!< No overwrite, nor deletion allowed
Yes = 1, //!< Overwrite and deletion allowed
Update = 2, //!< Overwrite allowed if the new item is newer than the one on the filesystem
};
/** Some useful methods to convert between internal checksum to hexdecimal */
namespace Helpers
{
/** The allowed compressors */
enum CompressorToUse
{
None = 0, //!< No compression done to this multichunk
ZLib = 1, //!< Using ZLib compression
BSC = 2, //!< Using BSC compression
// No other compressor supported
Default = -1, //!< When not specified, this will select the global 'compressor' value
};
/** Convert a small binary blob to a string. */
String fromBinary(const uint8 * data, const uint32 size, const bool base85 = true);
/** Convert a string back to the binary blob. */
bool toBinary(const String & src, uint8 * data, uint32 & size, const bool base85 = true);
/** Encrypt a given block with AES counter mode.
@warning Beware that this use the current key factory to figure out the current key and nonce */
bool AESCounterEncrypt(const KeyFactory::KeyT & nonceRandom, const ::Stream::InputStream & input, ::Stream::OutputStream & output);
/** Decrypt a given block with AES counter mode.
@warning Beware that this use the current key factory to figure out the current key and nonce */
bool AESCounterDecrypt(const KeyFactory::KeyT & nonceRandom, const ::Stream::InputStream & input, ::Stream::OutputStream & output);
/** Encrypt or decrypt using AES counter mode.
This does not use the current key factory and thus is more verbose to use.
This uses a simple scheme for nonce+counter concatenation (unlike AESCounterEncrypt)
@param key The encryption/decryption key
@param nonceRandom The nonce random value used (only the first 8 bytes are used)
@param input The input stream
@param output The output stream
@param inputHash If provided, this will compute the hash of the input data and store it there
@param outputHash If provided, this will compute the hash of the output data and store it there (only one is expected) */
bool AESCounterProcess(const KeyFactory::KeyT & key, const KeyFactory::KeyT & nonceRandom, const ::Stream::InputStream & input, ::Stream::OutputStream & output, ProgressCallback & callback, uint8 * inputHash = 0, uint8 * outputHash = 0);
/** Ensure the index file is available or recreate if not
@return empty string on success, or the error message */
String ensureValidIndexFile(const String & encryptedIndexPath, const String & localIndexPath, const KeyFactory::KeyT & key, ProgressCallback & callback, const bool forceDecryption = false);
/** Close a currently filled multichunk and save in database and filesystem */
// bool closeMultiChunk(const String & basePath, File::MultiChunk & multiChunk, uint64 multichunkListID, uint64 * totalOutSize, ProgressCallback & callback, uint64 & previousMultichunkID, CompressorToUse actualComp = Default);
/** Extract a chunk out of a multichunk */
File::Chunk * extractChunk(String & error, const String & basePath, const String & MultiChunkPath, const uint64 MultiChunkID, const size_t chunkOffset, const String & chunkChecksum, const String & filterMode, File::MultiChunk * cache, ProgressCallback & callback);
/** Allocate a chunk list ID */
unsigned int allocateChunkList();
}
/** The archive format used in Frost v2.
Using a database for index seems like a good idea, but it does not scale very well when the number of entries
is large, even when the Database's Index are created correctly.
This is because the database file itself is becoming large and a lot of swapping is done to load pages containing the indexes.
During both backup and restore process, the index file is not mutated, leading to only increase in size (backup only).
During purging, the file is mutated. In order to guaranty atomicity of purge-slow process with any data loss, the index file
is rewritten to a new file, and only when this file is completed, it's swapped with the previous file which is then deleted
If you are synchronizing the index file remotely, then this means that slow-purging will require sending a whole new file again,
while using the default system only requires sending the modified part (with rsync, it's done automatically)
So, in order to speed up our backup process, I'm changing the system from DB to a binary format.
In order to understand this design choice, on a medium sized dataset (250GB), the number of Chunk is around 12e6
If each chunk is 20 bytes (SHA-1) + size (2 bytes) + ID (4 bytes) then this takes 312MB in memory (or on a file)
When backing up, restoring and purging, a fast O(log N) access is required to this array, and this imposes that the chunk cache
stays in memory else it will be too slow.
Currently, because of the "append"-like structure we'd like to keep for the index, we have to consolidate multiple chunk cache
fragments into a large one. Because a single chunk is never stored twice, we don't need to write again the complete cache
to the index file, as long as we write the new items from the cache in a new fragment.
This means that we'll have to hold two versions in memory while backing up and purging, the complete cache, and the "new items"
cache
Concerning multichunks, the same process applies, but since the number of multichunk is much lower, it's not an algorithm issue.
We also store chunk lists fragments (an array of chunk's ID) identified by a UID for each multichunk, and for each file.
A typical backup folder is around 100k files, the chunk list's UID is 4 bytes, so it's negligible
The file tree is also stored as an array, but unlike the database format, we store the complete tree for each revision to avoid
having to consolidate the file tree
So, for a backup tree of 100k files, each file consumes:
- 4 bytes for UID
- 4 bytes for parent UID
- 4 bytes (chunk lists' UID)
- ~60 bytes (average for file's metadata)
- ~22 bytes (average for file's base name)
= 94 bytes per file on average => 9.4MB of data for 100k files
Because each file's record is variable size, and we want to avoid storing useless "length" field.
Metadata size is known while parsing starts on the metadata field, so this does not requires any length storage.
File's name is zero-char terminated. The last entry in a File tree block might get padded with zero so it ends on a 32 bits boundary
Like many archive format, we except being able to seek anywhere in the archive (unlike tar, but more like zip) as O(1), so here's
the idea for implementation:
# FILE HEADER
The file header will contain the magic number ('Frst' or 0x46727374), followed by a 32 bit version/state (currently 0x3)
A 32 bit offset to the main catalog of object, in 4-bytes unit (this limits the index' size to 16GB maximum)
The ciphered's master key follows (108 bytes).
If the catalog offset is 0, then a full catalog is expected to be found at end of file. This shortcut allows no modification
to the file header on backup, and only have the index file to grow.
# DATA HEADER
Each data block starts by a header containing 3 bits : data block type, followed by 29 bits : data block size in 4-bytes units
See below for a description of the possible data block type
All data block will be aligned on a 32 bits boundary in the file
# CATALOG
The catalog is a special data block object with type 'B' that contains the offset of the other (main/valid) data block in the file.
Since no modification to the index is done during backup (except for adding data to the index), this is the only block that can be
mutated (but this is not required, and the default implementation does not do it)
The catalog contains a reference to the previous version of the catalog (from a previous run of backup), and the position of the
current file tree object, plus the position to the chunk and multichunk indexes index block
Data blocks
# Chunks blocks - type 'C'
Chunks blocks also have an additional 32 bits header that stores the revision they appeared in
Chunks are indexed by their checksum. Since index are required for all operations (backup, restoring and purging), a
consolidation step is done upon starting Frost that's merging all the index blocks. This means that the current array that maps
chunk's checksum is rebuilt at Frost starting time.
For backup, there is only one Chunk block per revision (the "difference new" block is directly dumped to the file via mmap and memcpy)
When purging fast, the file tree starting at the kept revision is analyzed to find out if it needs chunks from the previous revisions
If it does, then a new chunk block is created with the kept revision - 1 and the chunks are copied to these blocks.
Then the previous chunks are punched (that is, for system supporting punch hole system call), so they don't use file space anymore.
The end user results is that after fast purging, the index file size actually increase but size on storage actually decrease (via the
punch hole feature)
Useless multichunks are deleted too, but it's not part of this file format process
# Chunk lists blocks - type 'L'
Chunk lists block have a 32 bits header containing the chunks list's UID.
It's then followed by a linear array of chunk id. Offset are not saved because the array is kept in order
There is one chunk list per file, one per multichunk.
# Multichunks - type 'M'
Multichunks have a 16 bits header that stores the multichunk ID (only 65536 multichunk are being used per backup)
Then follows their linked chunk list ID (32 bits), the filter argument conditions (16 bits for the filter argument's index
in the filter arguments block).
The last field is a 32 bytes string holding the SHA256 of the multichunk (this is used to find the multichunk in the backup directory)
# Filter argument list block - type 'A'
A zero-byte terminated string array (delimited by '\n') containing the list of filter arguments
# File tree block - type 'F'
A file tree block has a 32 bits header that stores the revision number for this file tree.
Then follows a linked list of file tree item (see above) (around 100 bytes per file). Each item is separated by a zero byte
after the file's base name.
# Metadata block entry - type 'M'
There's usually only one such block in the file.
This block is there for user based information, such as description storage of the backup set, the initial backup path
Additional information is possible here
# Catalog block entry - type 'B'
A catalog block entry has a 32 bits header containing the revision number for backup.
A 32 bits offset (in 4 bytes unit) follow that stores the offset to the previous revision's catalog position
It then stores the offset to the current's revision's chunks block as a 32 bits number
The offset to the multichunks follow (as a 32 bits number)
Then the offset to the current revision file tree follows (as a 32 bits number)
Optionally, the offset to the filter argument list block can follow (as a 32 bits number),
followed by offset to the metadata block entry (as a 32 bits number)
@note Archive integers are stored as native endianness (typically little-endian) so they can be memory mapped directly.
This means that you can not backup a system with a given endianness and restore on a system with a different endianness.
Hopefully, this is usually not required
When using safe index mode, the index file is encrypted and stored on the remote folder (so it's safe to store on hostile server).
The local cleartext index is not deleted, and used as cache for subsequent operations.
The cache is used if no encrypted version is found or if and only if:
- It is present
- Its modification time is the same as the ciphered index file's modification time
- Its size is equal to the size of the ciphered index file minus the header size (48 bytes)
- (skipped by default) Its hash matches the one written in the header in the ciphered index file
* Notice that since the cache is on your system, it's very unlikely it becomes modified by an attacker and computing the hash of
such huge file is likely to take a lot of time each time, so it's not checked by default
If the cache file is not found, it's decrypted from the encrypted index file and hash is also computed and checked.
The cyphered index file is named "index.frost.aes" and contains a header like this:
- 32 bits magic number ('FtEi' 0x46744569)
- 32 bits version/state (currently 0x3)
- 256 bits SHA256 hash of the unencrypted index file
- 64 bits of the random salt used as nonce in AES256 CTR mode, counter starts with 0
*/
namespace FileFormat
{
/** Check if a memory block is zero */
template <size_t N>
inline bool isZero(const uint8 (&a)[N]) { for (size_t i = 0; i < N; i++) if (a[i]) return false; return true; }
// Starting from now, everything is typically packed
#pragma pack(push, 1)
/** The ciphered index header (see above, new in v3) */
struct CipheredIndexHeader
{
/** The magic number */
union { uint32 number; char text[4]; } magic;
/** The version number (should be 3) */
uint32 version;
/** The hash of the cleartext index file */
uint8 hash[32];
/** The random nonce used */
uint8 nonce[8];
/** Check if this header is valid */
bool isValid() const { return memcmp(magic.text, "FtEi", 4) == 0 && version == 3; }
CipheredIndexHeader() : version(3) { memcpy(magic.text, "FtEi", 4); memset(hash, 0, ArrSz(hash)); memset(nonce, 0, ArrSz(nonce)); }
};
/** The file's offset used */
struct Offset
{
uint32 offset;
/** Helper function to convert from file storage to real offset */
inline uint64 fileOffset() const { return offset * 4ULL; };
/** Helper function to convert from real offset to file storage */
inline void fileOffset(const uint64 off) { Assert((off & 3) == 0 && "Offset must be a aligned on 4 bytes"); offset = (uint32)(off >> 2ULL); }
/** Default construction */
Offset(uint64 offset = 0) { fileOffset(offset); }
};
/** The data block header */
struct DataHeader
{
union {
struct {
/** The data type (check the Type enumeration for the actual meaning) */
uint32 type : 3;
/** The block size in 4 bits unit (up to 2GB) */
uint32 blockSize : 29;
};
/** The data block type and size */
uint32 typeAndSize;
};
/** The possible block type */
enum Type
{
Catalog = 0, //!< A Catalog block type
Chunk = 1, //!< The chunks block
ChunkList = 2, //!< The list of chunks
Multichunk = 3, //!< The multichunks block
FilterArgument = 4, //!< The filters argument list
FileTree = 5, //!< The file tree
Metadata = 6, //!< A metadata block
Extended = 7, //!< Extended block type (the next extra word contains the type)
};
/** Check if this header is correct */
bool isCorrect(const int64 fileSize, const uint64 fileOffset) const { return blockSize * 4 + fileOffset <= fileSize; }
/** Get the size for this block (not the header size, use sizeof(DataHeader) to get it) */
uint64 getSize() const { return blockSize * 4; }
/** Set the size for this block (not the header size) obviously */
void setSize(const uint64 s) { blockSize = (s+3)/4; }
/** Dump the header (for debugging purpose only) */
String dump() const
{
/** The type to name macro */
static const char * TypeToName[] = { "Catalog", "Chunk", "ChunkList", "Multichunk", "FilterArgument", "FileTree", "Metadata", "Extended" };
return String::Print("[t:%s,s:%llu]", TypeToName[type], getSize());
}
/** Default construction */
DataHeader(Type type = Catalog, const uint32 size = 0) : type(type), blockSize(size) {}
};
/** The catalog data block */
struct Catalog
{
/** The block header */
DataHeader header;
/** The revision number */
uint32 revision;
/** The revision's time (in seconds since Epoch) */
uint32 time;
/** Previous catalog offset */
Offset previous;
/** The chunks's block offset */
Offset chunks;
/** The chunk lists' chained list offset */
Offset chunkLists;
/** The number of chunk lists at the given offset (they follow each other) */
uint32 chunkListsCount;
/** The multichunks block offset */
Offset multichunks;
/** The number of multichunks at the given offset (they follow each other) */
uint32 multichunksCount;
/** The file tree block offset */
Offset fileTree;
/** Optional filter argument list */
Offset optionFilterArg;
/** Optional metadata block */
Offset optionMetadata;
/** Check if this block is correct */
bool isCorrect(const int64 fileSize, const uint64 fileOffset) const
{
return header.isCorrect(fileSize, fileOffset) // Does header fit ?
&& (fileSize >= fileOffset + getSize()) // Do we completely fit without optional fields ?
&& (previous.fileOffset() <= fileSize) // Does the previous block offset fit ?
&& (chunks.fileOffset() <= fileSize) // Does the chunks block offset fit ?
&& (chunkLists.fileOffset() <= fileSize) // Do the chunk lists fit ?
&& (multichunks.fileOffset() <= fileSize) // Does the multichunk block offset fit ?
&& (fileTree.fileOffset() <= fileSize) // Does the file tree offset fit ?
&& (optionFilterArg.fileOffset() <= fileSize) // Does the optional filter argument fit ?
&& (optionMetadata.fileOffset() <= fileSize); // Does the optional metadata offset fit ?
}
/** Get the block expected size */
static uint64 getSize() { return sizeof(Catalog); }
/** Load the structure from the given memory pointer */
bool load(const uint8 * ptr, const uint64 size)
{
if (size <= sizeof(header)) return false; // Check if we can read the header
memcpy(this, ptr, sizeof(header));
if (size < header.getSize()) return false; // Check if the size is coherent with what we expect
memcpy(this, ptr, header.getSize());
return true;
}
/** Write the structure to the given memory pointer */
void write(uint8 * ptr) { header.setSize(getSize()); memcpy(ptr, this, header.getSize()); }
/** Dump the catalog offset (for debugging purpose) */
String dump() const
{
String ret = header.dump();
ret += String::Print(" Catalog rev%u %s\n", revision, (const char*)Time::Time(time).toDate());
ret += String::Print(" Off prev: %llu\n", previous.fileOffset());
ret += String::Print(" Off chunk: %llu\n", chunks.fileOffset());
ret += String::Print(" Off chunklist: %llu (%u lists)\n", chunkLists.fileOffset(), chunkListsCount);
ret += String::Print(" Off mchunk: %llu (%u mchunks)\n", multichunks.fileOffset(), multichunksCount);
ret += String::Print(" Off filetree: %llu\n", fileTree.fileOffset());
ret += String::Print(" Off filterArg: %llu\n", optionFilterArg.fileOffset());
ret += String::Print(" Off metadata: %llu\n", optionMetadata.fileOffset());
return ret;
}
/** Default constructor */
Catalog(const uint32 revision = 0)
: header(DataHeader::Catalog), revision(revision), time(::Time::Time::Now().Second()), chunkListsCount(0), multichunksCount(0) {}
};
/** The internal chunk item */
struct Chunk
{
/** The chunk's checksum */
uint8 checksum[20];
/** The chunk's size */
uint16 size;
/** The multichunk that's holding this chunk (up to 65536 possible multichunk) */
uint16 multichunkID;
/** The chunk unique identifier */
uint32 UID;
// These operators are required for sorting them
/** If we have an UID, use that for comparing fast, else use the checksum for this */
bool operator == (const Chunk & k) const { return UID && k.UID ? UID == k.UID : size == k.size && memcmp(checksum, k.checksum, ArrSz(checksum)) == 0; }
bool operator <= (const Chunk & k) const { return (size < k.size) || (size == k.size && memcmp(checksum, k.checksum, ArrSz(checksum)) <= 0); }
bool operator != (const Chunk & k) const { return !(*this == k); }
/** This is required for the array's sorting functions (we don't care about UID here) */
static int compareData(const Chunk & a, const Chunk & b) { if (a.size < b.size) return -1; if (a.size > b.size) return 1; return memcmp(a.checksum, b.checksum, ArrSz(a.checksum)); }
Chunk(const uint32 UID = 0) : UID(UID), size(0), multichunkID(0) { memset(this, 0, sizeof(Chunk) - sizeof(UID)); }
Chunk(const uint8 chksum[20], const uint16 size) : size(size), multichunkID(0), UID(0) { memcpy(checksum, chksum, 20); }
};
/** This is used to search and sort the chunk array by UID for restoring, purging mainly */
struct ChunkUIDSorter
{
/** This is required for the array's sorting functions (we only care about UID here) */
static int compareData(const Chunk & a, const Chunk & b) { return a.UID < b.UID ? -1 : (a.UID == b.UID ? 0 : 1); }
};
/** Make the checksum type a real type, else it fails to compile with the automatic conversion */
struct ChecksumType
{
uint8 checksum[20];
operator uint8 * () { return &checksum[0]; }
operator const uint8 * () const { return &checksum[0]; }
ChecksumType(const uint8 (&CS)[20]) { memcpy(checksum, CS, ArrSz(checksum)); }
ChecksumType(const uint8 * CS) { memcpy(checksum, CS, ArrSz(checksum)); }
ChecksumType() { memset(checksum, 0, ArrSz(checksum)); }
};
/** Implementation of the hashing policies for integers.
@param T must be an integer type
You must provide a uint32 hashIntegerKey(T) overload for this to work */
struct IntegerHashingPolicyForChecksum
{
/** The type for the hashed key */
typedef uint32 HashKeyT;
/** Allow compiletime testing of the default value */
enum { DefaultAreZero = 1 };
/** Check if the initial keys are equal */
static bool isEqual(const ChecksumType key1, const ChecksumType key2) { return memcmp(key1, key2, sizeof(ChecksumType)) == 0; }
/** Compute the hash value for the given input */
static inline HashKeyT Hash(const ChecksumType x) { HashKeyT a; memcpy(&a, x, sizeof(a)); return !a ? 1 : a; } // No need to hash a checksum but 0 is forbidden here...
/** Get the default hash value (this is stored by default in the buckets), it's the value that's means that the hash is not computed yet. */
static inline HashKeyT defaultHash() { return 0; }
};
/** The hash table linked bucket type.
This implementation is not the fastest possible because of the missing cache access for getting hash and key, yet
it's optimized for constrained memory.
The idea here being that the Chunk array will not have to be sorted by checksum anymore (this is slow to append and to search).
Instead, we don't sort the chunk array anymore, and use this hash table to map checksums to index in the chunk array.
Because the hash table will have to fit in memory, it has to be as compact as possible.
So, we only store the index in the bucket, and recompute hash & key whenever required.
Fortunately, there is no computation required for Chunks, so it should be quite fast (except for cache miss on each test) */
struct SmallBucket
{
/** The hask key type we need */
typedef uint32 HashKeyT;
typedef ChecksumType Key;
/** This bucket data */
uint32 data;
/** This bucket hash */
uint32 hash;
/** Get the hash for this bucket */
inline HashKeyT getHash(void * opaque) const { return hash; }
/** Get the key for this bucket */
inline uint8* getKey(void * opaque) const { static ChecksumType k; const uint8 * cs = getChunk(opaque); if (!cs) memset(&k, 0, sizeof (k)); else memcpy(&k, cs, sizeof(k)); return k; }
/** Set the hash for this bucket */
inline void setHash(const HashKeyT h, void *) { hash = h; } // No op in our case
/** Set the key for this bucket */
inline void setKey(Key k, void *) { }
/** Reset the key to the default value */
inline void resetKey(void *) { }
/** Swap the content of this bucket with the given parameters.
@return On output, this key is set to k, this hash is set to h and data is set to value, and k is set to the previous key, h to the previous hash, value to the previous data */
inline void swapBucket(Key k, HashKeyT & h, uint32 & value)
{
HashKeyT tmpH = hash; hash = h; h = tmpH;
uint32 tmpD = data; data = value; value = tmpD;
}
/** Swap the bucket with another one */
inline void swapBucket(SmallBucket & o)
{
uint32 tmpD = data; data = o.data; o.data = tmpD;
HashKeyT tmpH = hash; hash = o.hash; o.hash = tmpH;
}
SmallBucket() : data(0), hash(0) {}
private:
/** Get the Chunk for the given index */
inline const uint8 * getChunk(void * opaque) const { Container::PlainOldData<Chunk>::Array * array = (Container::PlainOldData<Chunk>::Array *)opaque; return data < array->getSize() ? array->getElementAtUncheckedPosition(data).checksum : 0; }
};
/** The chunk index map (the one being used for mapping the chunk's checksum to their index in the chunk array) */
typedef Container::RobinHoodHashTable<uint32, ChecksumType, IntegerHashingPolicyForChecksum, SmallBucket> ChunkIndexMap;
/** The chunks block */
struct Chunks
{
/** The block header */
DataHeader header;
/** The chunk array revision */
uint32 revision;
/** The sorted array of chunk's structure */
Container::PlainOldData<Chunk>::Array chunks;
/** (internal) Is the list mapped - readonly */
bool mapped;
/** Check correctness of this information for testing purpose */
bool isCorrect(const uint64 fileSize, const uint64 fileOffset = 0) const { return header.isCorrect(fileSize, fileOffset) && fileSize >= getSize() + fileOffset; }
/** Get the structure size (as some have optional fields) */
uint64 getSize() const { return sizeof(header) + sizeof(revision) + chunks.getSize() * sizeof(Chunk); }
/** Load the structure from the given memory pointer */
bool load(const uint8 * ptr, const uint64 size)
{
if (size <= sizeof(header) + sizeof(revision)) return false; // Check if we can read the header
memcpy(this, ptr, sizeof(header)+sizeof(revision));
size_t elemCount = (header.getSize() - sizeof(header) - sizeof(revision)) / sizeof(Chunk);
if (size < elemCount) return false;
// Then read all (remaining chunks)
if (mapped) (void)chunks.getMovable(); // If mapped from a readonly area, release it
else chunks.Clear();
chunks.Grow(elemCount, (Chunk*)(ptr + sizeof(header) + sizeof(revision)));
return true;
}
/** Load from a read-only memory without making a copy of the memory area.
@warning This array after this call can not be modified as it points to fixed memory it has not allocated
@warning You must call this method again with 0 for the pointer to tell the system to release the array without actually freeing the area */
bool loadReadOnly(const uint8 * ptr, const uint64 size)
{
if (ptr == 0) { (void)chunks.getMovable(); return true; }
if (size <= sizeof(header) + sizeof(revision)) return false; // Check if we can read the header
memcpy(this, ptr, sizeof(header)+sizeof(revision));
size_t elemCount = (header.getSize() - sizeof(header) - sizeof(revision)) / sizeof(Chunk);
if (size < header.getSize()) return false;
// Capture the pointer
Container::PlainOldData<Chunk>::Array::Internal intern = { (Chunk*)(ptr + sizeof(header)+sizeof(revision)), elemCount, elemCount };
chunks.Clear(); // Avoid any leak
// Rebuild the array with the captured data
new (&chunks) Container::PlainOldData<Chunk>::Array(intern);
mapped = true;
return true;
}
/** Write the structure to the given memory pointer */
void write(uint8 * ptr) { if (mapped) return; header.setSize(getSize()); memcpy(ptr, this, sizeof(header) + sizeof(revision)); memcpy(ptr + sizeof(header) + sizeof(revision), &chunks.getElementAtUncheckedPosition(0), chunks.getSize() * sizeof(Chunk)); }
/** Clear the array */
inline void Clear() { if (mapped) (void)chunks.getMovable(); else chunks.Clear(); }
/** Find a chunk ID from its checksum (return -1 if not found) */
uint32 findChunk(Chunk & chunk) const { size_t pos = chunks.indexOfSorted(chunk, 0); return pos == chunks.getSize() ? (uint32)-1 : chunks[pos].UID; }
/** Dump the chunks (not the inner content, it's too much work) */
String dump() const
{
String ret = header.dump() + String::Print(" Chunks rev: %u, count: %u\n", revision, (uint32)chunks.getSize());
for (size_t i = 0; i < chunks.getSize(); i++) ret += String::Print(" Chunk UID: %u, multichunk ID: %u, size: %u\n", chunks[i].UID, chunks[i].multichunkID, chunks[i].size);
return ret;
}
Chunks(const uint32 revision = 0) : header(DataHeader::Chunk), revision(revision), mapped(false) {}
~Chunks() { if (mapped) (void)chunks.getMovable(); }
};
/** The chunk list block */
struct ChunkList
{
/** The block header */
DataHeader header;
union
{
struct
{
/** The list unique identifier */
uint32 UID : 31;
/** If the offset field is present in this object */
uint32 offset : 1;
};
uint32 _h; // This is for sizeof
};
/** The array of chunks UID */
Container::PlainOldData<uint32>::Array chunksID;
/** The array of offset in the chunk (this is used to have O(1) search in multichunks instead of O(N)) */
Container::PlainOldData<uint32>::Array offsets;
/** Check correctness of this information for testing purpose */
bool isCorrect(const uint64 fileSize, const uint64 fileOffset = 0) const { return header.isCorrect(fileSize, fileOffset) && fileSize >= getSize() + fileOffset; }
/** Get the structure size (as some have optional fields) */
uint64 getSize() const { return sizeof(header) + sizeof(_h) + chunksID.getSize() * sizeof(uint32) + offsets.getSize() * sizeof(uint32); }
/** Load the structure from the given memory pointer */
bool load(const uint8 * ptr, const uint64 size)
{
if (size <= sizeof(header) + sizeof(_h)) return false; // Check if we can read the header
memcpy(this, ptr, sizeof(header)+sizeof(_h));
size_t elemCount = offset ? (header.getSize() - sizeof(header) - sizeof(_h)) / (2*sizeof(uint32)) : (header.getSize() - sizeof(header) - sizeof(_h)) / sizeof(uint32);
if (size < header.getSize()) return false;
// Then read all (remaining chunks)
chunksID.Clear();
chunksID.Grow(elemCount, (uint32*)(ptr + sizeof(header) + sizeof(_h)));
offsets.Clear();
if (offset) offsets.Grow(elemCount, (uint32*)(ptr + sizeof(header) + sizeof(_h) + elemCount * sizeof(uint32)));
return true;
}
/** Append a chunk ID and optional offset to the list */
void appendChunk(const uint32 ID, const uint32 off = 0)
{
chunksID.Append(ID);
if (offset) offsets.Append(off);
}
/** Write the structure to the given memory pointer */
void write(uint8 * ptr)
{
header.setSize(getSize());
memcpy(ptr, this, sizeof(header) + sizeof(_h));
memcpy(ptr + sizeof(header) + sizeof(_h), &chunksID.getElementAtUncheckedPosition(0), chunksID.getSize() * sizeof(uint32));
memcpy(ptr + sizeof(header) + sizeof(_h) + chunksID.getSize() * sizeof(uint32), &offsets.getElementAtUncheckedPosition(0), offsets.getSize() * sizeof(uint32));
}
/** Find the offset for the given chunk UID (only works for list with offset obviously).
This performs a O(N) search on the index here */
size_t getChunkOffset(const uint32 chunkID) const
{
if (offset)
{
size_t pos = chunksID.indexOf(chunkID);
if (pos != chunksID.getSize()) return (size_t)offsets.getElementAtPosition(pos);
}
return (size_t)-1;
}
/** Dump this list */
String dump() const
{
String ret = header.dump();
ret += String::Print(" Chunklist with UID: %u (chunks count: %u, offsets count: %u)\n", UID, (uint32)chunksID.getSize(), (uint32)offsets.getSize());
for (size_t i = 0; i < chunksID.getSize(); i++) ret += String::Print(" Chunk %u with UID: %u and offset %u\n", (uint32)i, chunksID[i], offsets[i]);
return ret;
}
ChunkList(const uint32 UID = 0, const bool withOffset = false) : header(DataHeader::ChunkList), UID(UID), offset(withOffset ? 1 : 0) {}
};
/** The Chunk Lists (it's a hash table of ChunkList which follow each other in the file). The size of this array is stored in the catalog */
typedef Container::HashTable<ChunkList, uint32> ChunkLists;
/** Multichunks block. */
struct Multichunk
{
/** The block header */
DataHeader header;
/** The chunk list ID used */
uint32 listID;
/** The multichunk unique identifier */
uint16 UID;
/** The filter argument's index in the filter argument's object */
uint16 filterArgIndex;
/** The checksum for this object (SHA-256) */
uint8 checksum[32];
/** Check correctness of this information for testing purpose */
bool isCorrect(const uint64 fileSize, const uint64 fileOffset = 0) const { return header.isCorrect(fileSize, fileOffset) && fileSize >= getSize() + fileOffset; }
/** Get the structure size (as some have optional fields) */
static uint64 getSize() { return sizeof(Multichunk); }
/** Load the structure from the given memory pointer */
bool load(const uint8 * ptr, const uint64 size) { if (size <= getSize()) return false; memcpy(this, ptr, getSize()); return true; }
/** Write the structure to the given memory pointer */
void write(uint8 * ptr) { memcpy(ptr, this, getSize()); }
/** Get the file base name for this multichunk */
String getFileName() const;
/** Dump this list */
String dump() const
{
return header.dump() + String::Print(" Multichunk UID: %u, chunklist ID: %u, argIndex: %u, checksum: %s\n", UID, listID, filterArgIndex, (const char*)Helpers::fromBinary(checksum, sizeof(checksum), false));
}
Multichunk(const uint16 UID = 0) : header(DataHeader::Multichunk, (uint32)getSize()), UID(UID), listID(0), filterArgIndex(0) { memset(checksum, 0, ArrSz(checksum)); }
};
/** The multichunks (it's a hash table of Multichunks which follow each other in the file). The size of this array is stored in the catalog */
typedef Container::HashTable<Multichunk, uint16> Multichunks;
/** The multichunks from the previous revisions */
typedef Container::HashTable<Multichunk, uint16, Container::NoHashKey<uint16>, Container::NoDeletion<Multichunk> > MultichunksRO;
/** The filter arguments. Usually, there's only one of them in the index file */
struct FilterArguments
{
/** The block header */
DataHeader header;
/** The filter arguments */
Strings::StringArray arguments;
/** Check if this list was modified */
bool modified;
/** Check correctness of this information for testing purpose */
bool isCorrect(const uint64 fileSize, const uint64 fileOffset = 0) const { return header.isCorrect(fileSize, fileOffset) && fileSize >= getSize() + fileOffset; }
/** Get the structure size (as some have optional fields) */
uint64 getSize() const { return sizeof(header) + ((arguments.Join().getLength()+1 + 3) & ~3); } // Need to round up to 4-bytes limit
/** Load the structure from the given memory pointer */
bool load(const uint8 * ptr, const uint64 size)
{
if (size <= sizeof(header)) return false; // Check if we can read the header
memcpy(this, ptr, sizeof(header));
if (size < header.getSize()) return false;
Strings::FastString args((const char*)(ptr + sizeof(header)), (int)header.getSize());
arguments.Clear();
arguments.appendLines((const char*)args, "\n");
modified = false;
return true;
}
/** Write the structure to the given memory pointer */
void write(uint8 * ptr)
{
header.setSize(getSize());
memcpy(ptr, this, sizeof(header));
const Strings::FastString & res = arguments.Join();
memcpy(ptr+sizeof(header), (const char*)res, (res.getLength()+1 + 3) & ~3);
}
/** Append an argument to the filters */
uint16 appendArgument(const String & argument) { modified = true; return (uint16)arguments.appendIfNotPresent(argument.Trimmed()); }
/** Get the index for a given argument or arguments.getSize() if not found */
uint16 getArgumentIndex(const String & argument) { return arguments.indexOf(argument); }
/** Get the argument for the given index */
const String & getArgument(const uint16 index) { return arguments[(size_t)index]; }
/** Clear the arguments */
void Reset() { modified = false; arguments.Clear(); }
/** Dump the object */
String dump() const
{
String ret = header.dump();
return ret + String::Print(" modified: %s\n ", modified ? "true" : "false") + arguments.Join("\n ") + "\n";
}
FilterArguments() : header(DataHeader::FilterArgument), modified(false) { }
};
/** The metadata about this backup */
struct MetaData
{
/** The block header */
DataHeader header;
/** The initial backup path will be at position 0 in the array */
Strings::StringArray info;
/** Check if this object was modified */
bool modified;
/** Check correctness of this information for testing purpose */
bool isCorrect(const uint64 fileSize, const uint64 fileOffset = 0) const { return header.isCorrect(fileSize, fileOffset) && fileSize >= getSize() + fileOffset; }
/** Get the structure size (as some have optional fields) */
uint64 getSize() const { return sizeof(header) + ((info.Join().getLength() + 3) & ~3); } // Need to round up to 4-bytes limit
/** Get the backup path (first item in array) */
const Strings::FastString & getBackupPath() const { return info[0]; }
/** Append a line to the metadata array */
void Append(const String & line) { modified = true; info.Append(line); }
/** Load the structure from the given memory pointer */
bool load(const uint8 * ptr, const uint64 size)
{
info.Clear();
if (!loadReadOnly(ptr, size)) return false;
modified = false;
return true;
}
/** Load the structure from the given memory pointer.
@warning We are reusing the name interface for appending instead of reloading */
bool loadReadOnly(const uint8 * ptr, const uint64 size)
{
if (size <= sizeof(header)) return false; // Check if we can read the header
memcpy(this, ptr, sizeof(header));
if (size < header.getSize()) return false;
Strings::FastString args((const char*)(ptr + sizeof(header)));
info.appendLines(args, "\n");
modified = true;
return true;
}
/** Write the structure to the given memory pointer */
void write(uint8 * ptr)
{
header.setSize(getSize());
memcpy(ptr, this, sizeof(header));
const Strings::FastString & res = info.Join();
memcpy(ptr+sizeof(header), (const char*)res, (res.getLength() + 3) & ~3);
}
/** Find a metadata that starts with the given key */
String findKey(const String & key) const { for (size_t i = 0; i < info.getSize(); i++) if (info[i].upToFirst(":") == key) return info[i]; return ""; }
/** Reset a metadata object */
void Reset() { modified = false; info.Clear(); }
/** Dump the object */
String dump() const
{
String ret = header.dump();
return ret + " " + info.Join("\n ") + "\n";
}
MetaData() : header(DataHeader::Metadata), modified(false) { }
};
/** The file tree.
The file tree is also stored as an array, but unlike the database format, we store the complete tree for each revision to avoid
having to consolidate the file tree
So, for a backup tree of 100k files, each file consumes:
- 4 bytes for UID
- 4 bytes for parent UID
- 4 bytes (chunk lists' UID)
- ~60 bytes (around for file's metadata)
- ~22 bytes (for file's base name)
= 94 bytes per file on average => 9.4MB of data for 100k files
Because each file's record is variable size, and we want to avoid storing useless "length" field.
Metadata size is known while parsing starts on the metadata field, so this does not requires any additional length storage.
File's name is zero-char terminated. The last entry in a File tree block might get padded with zero so it ends on a 32 bits boundary */
struct FileTree
{
/** The block header */
DataHeader header;
/** The current revision for this file tree */