diff --git a/src/taskgraph/docker.py b/src/taskgraph/docker.py index fdb2020c..5f715e32 100644 --- a/src/taskgraph/docker.py +++ b/src/taskgraph/docker.py @@ -122,7 +122,7 @@ def build_image(name, tag, args=None): tag = tag or docker.docker_image(name, by_tag=True) buf = BytesIO() - docker.stream_context_tar(".", image_dir, buf, "", args) + docker.stream_context_tar(".", image_dir, buf, args) cmdargs = ["docker", "image", "build", "--no-cache", "-"] if tag: cmdargs.insert(-1, f"-t={tag}") diff --git a/src/taskgraph/util/archive.py b/src/taskgraph/util/archive.py index b4101ca6..e39119db 100644 --- a/src/taskgraph/util/archive.py +++ b/src/taskgraph/util/archive.py @@ -7,6 +7,7 @@ import os import stat import tarfile +from contextlib import contextmanager # 2016-01-01T00:00:00+0000 DEFAULT_MTIME = 1451606400 @@ -104,14 +105,14 @@ def create_tar_from_files(fp, files): tf.addfile(ti, f) -def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9): - """Create a tar.gz file deterministically from files. +@contextmanager +def gzip_compressor(fp, filename=None, compresslevel=9): + """Create a deterministic GzipFile writer. - This is a glorified wrapper around ``create_tar_from_files`` that - adds gzip compression. + This is a glorified wrapper around ``GzipFile`` that adds some + determinism. The passed file handle should be opened for writing in binary mode. - When the function returns, all data has been written to the handle. """ # Offset 3-7 in the gzip header contains an mtime. Pin it to a known # value so output is deterministic. @@ -123,4 +124,17 @@ def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9): mtime=DEFAULT_MTIME, ) with gf: + yield gf + + +def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9): + """Create a tar.gz file deterministically from files. + + This is a glorified wrapper around ``create_tar_from_files`` that + adds gzip compression. + + The passed file handle should be opened for writing in binary mode. + When the function returns, all data has been written to the handle. + """ + with gzip_compressor(fp, filename, compresslevel) as gf: create_tar_from_files(gf, files) diff --git a/src/taskgraph/util/docker.py b/src/taskgraph/util/docker.py index e362aece..88c5d662 100644 --- a/src/taskgraph/util/docker.py +++ b/src/taskgraph/util/docker.py @@ -10,7 +10,7 @@ import re from typing import Optional -from taskgraph.util.archive import create_tar_gz_from_files +from taskgraph.util.archive import create_tar_from_files, gzip_compressor IMAGE_DIR = os.path.join(".", "taskcluster", "docker") @@ -76,10 +76,15 @@ class HashingWriter: def __init__(self, writer): self._hash = hashlib.sha256() self._writer = writer + self._written = 0 def write(self, buf): self._hash.update(buf) self._writer.write(buf) + self._written += len(buf) + + def tell(self): + return self._written def hexdigest(self): return self._hash.hexdigest() @@ -108,13 +113,8 @@ def create_context_tar(topsrcdir, context_dir, out_path, args=None): Returns the SHA-256 hex digest of the created archive. """ with open(out_path, "wb") as fh: - return stream_context_tar( - topsrcdir, - context_dir, - fh, - image_name=os.path.basename(out_path), - args=args, - ) + with gzip_compressor(fh, filename=os.path.basename(out_path)) as gf: + return stream_context_tar(topsrcdir, context_dir, gf, args=args) RUN_TASK_ROOT = os.path.join(os.path.dirname(os.path.dirname(__file__)), "run-task") @@ -135,7 +135,7 @@ def create_context_tar(topsrcdir, context_dir, out_path, args=None): ] -def stream_context_tar(topsrcdir, context_dir, out_file, image_name=None, args=None): +def stream_context_tar(topsrcdir, context_dir, out_file, args=None): """Like create_context_tar, but streams the tar file to the `out_file` file object.""" archive_files = {} @@ -201,7 +201,7 @@ def stream_context_tar(topsrcdir, context_dir, out_file, image_name=None, args=N archive_files["Dockerfile"] = io.BytesIO("".join(content).encode("utf-8")) writer = HashingWriter(out_file) - create_tar_gz_from_files(writer, archive_files, image_name) + create_tar_from_files(writer, archive_files) return writer.hexdigest() diff --git a/test/test_util_docker.py b/test/test_util_docker.py index 18034bbe..977f7d62 100644 --- a/test/test_util_docker.py +++ b/test/test_util_docker.py @@ -39,7 +39,7 @@ def test_generate_context_hash(self): docker.generate_context_hash( tmpdir, os.path.join(tmpdir, "docker/my-image"), "my-image" ), - "e1649b3427bd7a0387f4508d25057c2e89228748517aad6c70e3df54f47bd13a", + "ab46d51b191eb6c595cccf1fa02485b4e1decc6ba9737a8b8613038d3661be52", ) finally: shutil.rmtree(tmpdir) @@ -98,9 +98,9 @@ def test_create_context_tar_basic(self): os.chmod(os.path.join(d, "extra"), MODE_STANDARD) tp = os.path.join(tmp, "tar") - h = docker.create_context_tar(tmp, d, tp, "my_image") + h = docker.create_context_tar(tmp, d, tp) self.assertEqual( - h, "6c1cc23357625f64f775a08eace7bbc3877dd08d2f3546e0f2e308bac8491865" + h, "3134fa88c39a604132b260c2c3cf09f6fe4a8234475a4272fd9438aac47caaae" ) # File prefix should be "my_image" @@ -131,9 +131,9 @@ def test_create_context_topsrcdir_files(self): os.chmod(os.path.join(extra, "file0"), MODE_STANDARD) tp = os.path.join(tmp, "tar") - h = docker.create_context_tar(tmp, d, tp, "test_image") + h = docker.create_context_tar(tmp, d, tp) self.assertEqual( - h, "e7f14044b8ec1ba42e251d4b293af212ad08b30ec8ab6613abbdbe73c3c2b61f" + h, "56657d2f428fe268cc3b0966649a3bf8477dcd2eade7a1d4accc5c312f075a2f" ) with tarfile.open(tp, "r:gz") as tf: @@ -158,7 +158,7 @@ def test_create_context_absolute_path(self): fh.write(b"# %include /etc/shadow\n") with self.assertRaisesRegex(Exception, "cannot be absolute"): - docker.create_context_tar(tmp, d, os.path.join(tmp, "tar"), "test") + docker.create_context_tar(tmp, d, os.path.join(tmp, "tar")) finally: shutil.rmtree(tmp) @@ -172,7 +172,7 @@ def test_create_context_outside_topsrcdir(self): fh.write(b"# %include foo/../../../etc/shadow\n") with self.assertRaisesRegex(Exception, "path outside topsrcdir"): - docker.create_context_tar(tmp, d, os.path.join(tmp, "tar"), "test") + docker.create_context_tar(tmp, d, os.path.join(tmp, "tar")) finally: shutil.rmtree(tmp) @@ -186,7 +186,7 @@ def test_create_context_missing_extra(self): fh.write(b"# %include does/not/exist\n") with self.assertRaisesRegex(Exception, "path does not exist"): - docker.create_context_tar(tmp, d, os.path.join(tmp, "tar"), "test") + docker.create_context_tar(tmp, d, os.path.join(tmp, "tar")) finally: shutil.rmtree(tmp) @@ -214,10 +214,10 @@ def test_create_context_extra_directory(self): os.chmod(os.path.join(tmp, "file0"), MODE_STANDARD) tp = os.path.join(tmp, "tar") - h = docker.create_context_tar(tmp, d, tp, "my_image") + h = docker.create_context_tar(tmp, d, tp) self.assertEqual( - h, "d2a3363b15d0eb547a6c81a72ddf3980e2f6e6360c29b4fb6818102896f43180" + h, "ba7b62e8f25977e8e6629aee1d121ae92b6007258c90a53fb94c8607e1e96e10" ) with tarfile.open(tp, "r:gz") as tf: @@ -261,11 +261,11 @@ def test_stream_context_tar(self): # file objects are BufferedRandom instances out_file = BufferedRandom(BytesIO(b"")) h = docker.stream_context_tar( - tmp, d, out_file, "my_image", args={"PYTHON_VERSION": "3.8"} + tmp, d, out_file, args={"PYTHON_VERSION": "3.8"} ) self.assertEqual( - h, "e015aabf2677d90fee777c8813fd69402309a2d49bcdff2c28428134a53e36be" + h, "ba7b62e8f25977e8e6629aee1d121ae92b6007258c90a53fb94c8607e1e96e10" ) finally: shutil.rmtree(tmp)