Skip to content

Commit

Permalink
Merge pull request #626 from glandium/context-tar
Browse files Browse the repository at this point in the history
Hash the uncompressed contents of docker context tar
  • Loading branch information
jcristau authored Jan 15, 2025
2 parents 588dc4f + dffc5e0 commit 3001078
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 28 deletions.
2 changes: 1 addition & 1 deletion src/taskgraph/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def build_image(name, tag, args=None):
tag = tag or docker.docker_image(name, by_tag=True)

buf = BytesIO()
docker.stream_context_tar(".", image_dir, buf, "", args)
docker.stream_context_tar(".", image_dir, buf, args)
cmdargs = ["docker", "image", "build", "--no-cache", "-"]
if tag:
cmdargs.insert(-1, f"-t={tag}")
Expand Down
24 changes: 19 additions & 5 deletions src/taskgraph/util/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
import stat
import tarfile
from contextlib import contextmanager

# 2016-01-01T00:00:00+0000
DEFAULT_MTIME = 1451606400
Expand Down Expand Up @@ -104,14 +105,14 @@ def create_tar_from_files(fp, files):
tf.addfile(ti, f)


def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9):
"""Create a tar.gz file deterministically from files.
@contextmanager
def gzip_compressor(fp, filename=None, compresslevel=9):
"""Create a deterministic GzipFile writer.
This is a glorified wrapper around ``create_tar_from_files`` that
adds gzip compression.
This is a glorified wrapper around ``GzipFile`` that adds some
determinism.
The passed file handle should be opened for writing in binary mode.
When the function returns, all data has been written to the handle.
"""
# Offset 3-7 in the gzip header contains an mtime. Pin it to a known
# value so output is deterministic.
Expand All @@ -123,4 +124,17 @@ def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9):
mtime=DEFAULT_MTIME,
)
with gf:
yield gf


def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9):
"""Create a tar.gz file deterministically from files.
This is a glorified wrapper around ``create_tar_from_files`` that
adds gzip compression.
The passed file handle should be opened for writing in binary mode.
When the function returns, all data has been written to the handle.
"""
with gzip_compressor(fp, filename, compresslevel) as gf:
create_tar_from_files(gf, files)
20 changes: 10 additions & 10 deletions src/taskgraph/util/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import re
from typing import Optional

from taskgraph.util.archive import create_tar_gz_from_files
from taskgraph.util.archive import create_tar_from_files, gzip_compressor

IMAGE_DIR = os.path.join(".", "taskcluster", "docker")

Expand Down Expand Up @@ -76,10 +76,15 @@ class HashingWriter:
def __init__(self, writer):
self._hash = hashlib.sha256()
self._writer = writer
self._written = 0

def write(self, buf):
self._hash.update(buf)
self._writer.write(buf)
self._written += len(buf)

def tell(self):
return self._written

def hexdigest(self):
return self._hash.hexdigest()
Expand Down Expand Up @@ -108,13 +113,8 @@ def create_context_tar(topsrcdir, context_dir, out_path, args=None):
Returns the SHA-256 hex digest of the created archive.
"""
with open(out_path, "wb") as fh:
return stream_context_tar(
topsrcdir,
context_dir,
fh,
image_name=os.path.basename(out_path),
args=args,
)
with gzip_compressor(fh, filename=os.path.basename(out_path)) as gf:
return stream_context_tar(topsrcdir, context_dir, gf, args=args)


RUN_TASK_ROOT = os.path.join(os.path.dirname(os.path.dirname(__file__)), "run-task")
Expand All @@ -135,7 +135,7 @@ def create_context_tar(topsrcdir, context_dir, out_path, args=None):
]


def stream_context_tar(topsrcdir, context_dir, out_file, image_name=None, args=None):
def stream_context_tar(topsrcdir, context_dir, out_file, args=None):
"""Like create_context_tar, but streams the tar file to the `out_file` file
object."""
archive_files = {}
Expand Down Expand Up @@ -201,7 +201,7 @@ def stream_context_tar(topsrcdir, context_dir, out_file, image_name=None, args=N
archive_files["Dockerfile"] = io.BytesIO("".join(content).encode("utf-8"))

writer = HashingWriter(out_file)
create_tar_gz_from_files(writer, archive_files, image_name)
create_tar_from_files(writer, archive_files)
return writer.hexdigest()


Expand Down
24 changes: 12 additions & 12 deletions test/test_util_docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def test_generate_context_hash(self):
docker.generate_context_hash(
tmpdir, os.path.join(tmpdir, "docker/my-image"), "my-image"
),
"e1649b3427bd7a0387f4508d25057c2e89228748517aad6c70e3df54f47bd13a",
"ab46d51b191eb6c595cccf1fa02485b4e1decc6ba9737a8b8613038d3661be52",
)
finally:
shutil.rmtree(tmpdir)
Expand Down Expand Up @@ -98,9 +98,9 @@ def test_create_context_tar_basic(self):
os.chmod(os.path.join(d, "extra"), MODE_STANDARD)

tp = os.path.join(tmp, "tar")
h = docker.create_context_tar(tmp, d, tp, "my_image")
h = docker.create_context_tar(tmp, d, tp)
self.assertEqual(
h, "6c1cc23357625f64f775a08eace7bbc3877dd08d2f3546e0f2e308bac8491865"
h, "3134fa88c39a604132b260c2c3cf09f6fe4a8234475a4272fd9438aac47caaae"
)

# File prefix should be "my_image"
Expand Down Expand Up @@ -131,9 +131,9 @@ def test_create_context_topsrcdir_files(self):
os.chmod(os.path.join(extra, "file0"), MODE_STANDARD)

tp = os.path.join(tmp, "tar")
h = docker.create_context_tar(tmp, d, tp, "test_image")
h = docker.create_context_tar(tmp, d, tp)
self.assertEqual(
h, "e7f14044b8ec1ba42e251d4b293af212ad08b30ec8ab6613abbdbe73c3c2b61f"
h, "56657d2f428fe268cc3b0966649a3bf8477dcd2eade7a1d4accc5c312f075a2f"
)

with tarfile.open(tp, "r:gz") as tf:
Expand All @@ -158,7 +158,7 @@ def test_create_context_absolute_path(self):
fh.write(b"# %include /etc/shadow\n")

with self.assertRaisesRegex(Exception, "cannot be absolute"):
docker.create_context_tar(tmp, d, os.path.join(tmp, "tar"), "test")
docker.create_context_tar(tmp, d, os.path.join(tmp, "tar"))
finally:
shutil.rmtree(tmp)

Expand All @@ -172,7 +172,7 @@ def test_create_context_outside_topsrcdir(self):
fh.write(b"# %include foo/../../../etc/shadow\n")

with self.assertRaisesRegex(Exception, "path outside topsrcdir"):
docker.create_context_tar(tmp, d, os.path.join(tmp, "tar"), "test")
docker.create_context_tar(tmp, d, os.path.join(tmp, "tar"))
finally:
shutil.rmtree(tmp)

Expand All @@ -186,7 +186,7 @@ def test_create_context_missing_extra(self):
fh.write(b"# %include does/not/exist\n")

with self.assertRaisesRegex(Exception, "path does not exist"):
docker.create_context_tar(tmp, d, os.path.join(tmp, "tar"), "test")
docker.create_context_tar(tmp, d, os.path.join(tmp, "tar"))
finally:
shutil.rmtree(tmp)

Expand Down Expand Up @@ -214,10 +214,10 @@ def test_create_context_extra_directory(self):
os.chmod(os.path.join(tmp, "file0"), MODE_STANDARD)

tp = os.path.join(tmp, "tar")
h = docker.create_context_tar(tmp, d, tp, "my_image")
h = docker.create_context_tar(tmp, d, tp)

self.assertEqual(
h, "d2a3363b15d0eb547a6c81a72ddf3980e2f6e6360c29b4fb6818102896f43180"
h, "ba7b62e8f25977e8e6629aee1d121ae92b6007258c90a53fb94c8607e1e96e10"
)

with tarfile.open(tp, "r:gz") as tf:
Expand Down Expand Up @@ -261,11 +261,11 @@ def test_stream_context_tar(self):
# file objects are BufferedRandom instances
out_file = BufferedRandom(BytesIO(b""))
h = docker.stream_context_tar(
tmp, d, out_file, "my_image", args={"PYTHON_VERSION": "3.8"}
tmp, d, out_file, args={"PYTHON_VERSION": "3.8"}
)

self.assertEqual(
h, "e015aabf2677d90fee777c8813fd69402309a2d49bcdff2c28428134a53e36be"
h, "ba7b62e8f25977e8e6629aee1d121ae92b6007258c90a53fb94c8607e1e96e10"
)
finally:
shutil.rmtree(tmp)

0 comments on commit 3001078

Please sign in to comment.