-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdocker_pull_image.py
149 lines (126 loc) · 5.73 KB
/
docker_pull_image.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import requests
import os
import tarfile
import json
import hashlib
import argparse
import sys
def get_auth_token(repo):
auth_url = f"https://auth.docker.io/token?service=registry.docker.io&scope=repository:{repo}:pull"
response = requests.get(auth_url)
return response.json()['token']
def download_docker_image(image, tag, save_path=None, architecture="amd64"):
if '/' not in image:
repo = f"library/{image}"
else:
repo = image
token = get_auth_token(repo)
headers = {
"Authorization": f"Bearer {token}",
"Accept": "application/vnd.docker.distribution.manifest.list.v2+json,application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json"
}
manifest_url = f"https://registry-1.docker.io/v2/{repo}/manifests/{tag}"
response = requests.get(manifest_url, headers=headers)
if response.status_code != 200:
print(f"Error getting manifest: {response.status_code}")
print(response.text)
return
content_type = response.headers.get('Content-Type')
if content_type in ['application/vnd.docker.distribution.manifest.list.v2+json',
'application/vnd.oci.image.index.v1+json']:
print("Multi-architecture image detected. Selecting appropriate manifest...")
manifest_list = response.json()
for m in manifest_list.get("manifests", []):
if m.get("platform", {}).get("architecture") == architecture and m.get("platform", {}).get("os") == "linux":
manifest_digest = m["digest"]
break
else:
print(f"No manifest found for architecture: {architecture}")
return
manifest_url = f"https://registry-1.docker.io/v2/{repo}/manifests/{manifest_digest}"
headers[
"Accept"] = "application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json"
response = requests.get(manifest_url, headers=headers)
if response.status_code != 200:
print(f"Error getting manifest for architecture {architecture}: {response.status_code}")
print(response.text)
return
manifest = response.json()
else:
manifest = response.json()
if "layers" not in manifest:
print("Error: Unexpected manifest structure")
print(json.dumps(manifest, indent=2))
return
if "config" in manifest:
config_digest = manifest["config"]["digest"]
config_url = f"https://registry-1.docker.io/v2/{repo}/blobs/{config_digest}"
config_response = requests.get(config_url, headers=headers)
config = config_response.json()
else:
print("Warning: No config found in manifest. Using empty config.")
config = {}
total_size = sum(layer['size'] for layer in manifest['layers'])
print(f"Total uncompressed size of all layers: {total_size / 1024 / 1024:.2f} MB")
if save_path is None:
save_path = f"{image.split('/')[-1]}_{tag}.tar"
with tarfile.open(save_path, "w") as tar:
if "config" in manifest:
config_file = f"{config_digest[7:]}.json"
with open(config_file, "w") as f:
json.dump(config, f)
tar.add(config_file)
os.remove(config_file)
else:
config_file = "config.json"
with open(config_file, "w") as f:
json.dump({}, f)
tar.add(config_file)
os.remove(config_file)
for i, layer in enumerate(manifest["layers"]):
layer_digest = layer["digest"]
layer_url = f"https://registry-1.docker.io/v2/{repo}/blobs/{layer_digest}"
layer_response = requests.get(layer_url, headers=headers, stream=True)
layer_file = f"layer_{i}.tar.gz"
with open(layer_file, "wb") as f:
for chunk in layer_response.iter_content(chunk_size=8192):
f.write(chunk)
# Verify layer integrity
with open(layer_file, "rb") as f:
file_hash = hashlib.sha256(f.read()).hexdigest()
if f"sha256:{file_hash}" != layer_digest:
print(f"Warning: Layer {i} hash mismatch!")
else:
print(f"Layer {i} verified successfully.")
tar.add(layer_file)
os.remove(layer_file)
manifest_json = [{
"Config": config_file,
"RepoTags": [f"{image}:{tag}"],
"Layers": [f"layer_{i}.tar.gz" for i in range(len(manifest["layers"]))]
}]
with open("manifest.json", "w") as f:
json.dump(manifest_json, f)
tar.add("manifest.json")
os.remove("manifest.json")
print(f"{image}:{tag} image ({architecture}) downloaded and saved as {save_path}")
print(f"Compressed tar size: {os.path.getsize(save_path) / 1024 / 1024:.2f} MB")
print(f"Load the image using: docker load -i {save_path}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Download Docker images")
parser.add_argument("image", help="Docker image to download (e.g., 'ubuntu:20.04' or 'nginx:latest')")
parser.add_argument("-o", "--output", help="Output file name (default: <image>_<tag>.tar)")
parser.add_argument("-a", "--arch", default="amd64", help="Architecture (default: amd64)")
args = parser.parse_args()
try:
strList = args.image.split(":", 1)
image = strList[0]
if len(strList) > 1:
tag = strList[1]
else:
tag = "latest"
print(f"No tag specified, using latest tag: {tag}")
except ValueError:
print("Error: Image should be in the format 'name:tag'")
sys.exit(1)
download_docker_image(image, tag, args.output, args.arch)