-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path0_create_manifest.py
executable file
·57 lines (50 loc) · 1.82 KB
/
0_create_manifest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python
# creates manifest.json files in any folder that contains files in a hierarchy of folders
# based on:
# https://github.com/fair-research/bdbag/blob/master/doc/config.md#remote-file-manifest
# Example
# [
# {
# "url":"https://raw.githubusercontent.com/fair-research/bdbag/master/profiles/bdbag-profile.json",
# "length":699,
# "filename":"bdbag-profile.json",
# "sha256":"eb42cbc9682e953a03fe83c5297093d95eec045e814517a4e891437b9b993139"
# },
# {
# "url":"ark:/88120/r8059v",
# "length": 632860,
# "filename": "minid_v0.1_Nov_2015.pdf",
# "sha256": "cacc1abf711425d3c554277a5989df269cefaa906d27f1aaa72205d30224ed5f"
# }
# ]
import os
import sys
import json
import glob
import hashlib
baseurl = "https://" + sys.argv[1]
# example NERSC
# baseurl = "https://g-9fdb0b.6b7bd8.0ec8.data.globus.org/datareleases/dc0/mission/"
# example UCSD
# baseurl = 'https://g-456d30.0ed28.75bc.data.globus.org/datareleases/npipe6v20/fullsky/'
BUFFER = 4*1073741824
for dirpath, dirnames, filenames in os.walk("."):
manifest_dict = {}
for filename in filenames:
path = os.path.join(dirpath[2:], filename)
sha512 = hashlib.sha512()
with open(path, "rb") as f:
while True:
data = f.read(BUFFER)
if not data:
break
sha512.update(data)
length = os.stat(path).st_size
manifest_dict[path] = {'sha512': sha512.hexdigest(),
'filename': path,
'url': baseurl + path,
'length': length}
if len(filenames) > 0:
with open(os.path.join(dirpath, 'manifest.json'), 'w') as f:
json.dump(list(manifest_dict.values()), f, indent=4)
print(dirpath)