diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 108cb3772..1d7c03d0f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -85,19 +85,12 @@ jobs: id: cache-container-image uses: actions/cache@v4 with: - key: v4-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/*.py', 'dangerzone/container_helpers/*', 'install/common/build-image.py') }} + key: v5-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/*.py', 'dangerzone/container_helpers/*', 'install/common/build-image.py') }} path: | - share/container.tar.gz + share/container.tar share/image-id.txt - - name: Build and push Dangerzone image + - name: Build Dangerzone image if: ${{ steps.cache-container-image.outputs.cache-hit != 'true' }} run: | - sudo apt-get install -y python3-poetry python3 ./install/common/build-image.py - echo ${{ github.token }} | podman login ghcr.io -u USERNAME --password-stdin - gunzip -c share/container.tar.gz | podman load - tag=$(cat share/image-id.txt) - podman push \ - dangerzone.rocks/dangerzone:$tag \ - ${{ env.IMAGE_REGISTRY }}/dangerzone/dangerzone:tag diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0c32c9c40..69f92860a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,9 +59,9 @@ jobs: id: cache-container-image uses: actions/cache@v4 with: - key: v4-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/*.py', 'dangerzone/container_helpers/*', 'install/common/build-image.py') }} + key: v5-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/*.py', 'dangerzone/container_helpers/*', 'install/common/build-image.py') }} path: |- - share/container.tar.gz + share/container.tar share/image-id.txt - name: Build Dangerzone container image @@ -72,8 +72,8 @@ jobs: - name: Upload container image uses: actions/upload-artifact@v4 with: - name: container.tar.gz - path: share/container.tar.gz + name: container.tar + path: share/container.tar download-tessdata: name: Download and cache Tesseract data @@ -226,9 +226,9 @@ jobs: - name: Restore container cache uses: actions/cache/restore@v4 with: - key: v4-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/*.py', 'dangerzone/container_helpers/*', 'install/common/build-image.py') }} + key: v5-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/*.py', 'dangerzone/container_helpers/*', 'install/common/build-image.py') }} path: |- - share/container.tar.gz + share/container.tar share/image-id.txt fail-on-cache-miss: true @@ -333,9 +333,9 @@ jobs: - name: Restore container image uses: actions/cache/restore@v4 with: - key: v4-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/*.py', 'dangerzone/container_helpers/*', 'install/common/build-image.py') }} + key: v5-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/*.py', 'dangerzone/container_helpers/*', 'install/common/build-image.py') }} path: |- - share/container.tar.gz + share/container.tar share/image-id.txt fail-on-cache-miss: true @@ -428,9 +428,9 @@ jobs: - name: Restore container image uses: actions/cache/restore@v4 with: - key: v4-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/*.py', 'dangerzone/container_helpers/*', 'install/common/build-image.py') }} + key: v5-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/*.py', 'dangerzone/container_helpers/*', 'install/common/build-image.py') }} path: |- - share/container.tar.gz + share/container.tar share/image-id.txt fail-on-cache-miss: true @@ -471,30 +471,3 @@ jobs: # file successfully. xvfb-run -s '-ac' ./dev_scripts/env.py --distro ${{ matrix.distro }} --version ${{ matrix.version }} run --dev \ bash -c 'cd dangerzone; poetry run make test' - - check-reproducibility: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Install dev. dependencies - run: |- - sudo apt-get update - sudo apt-get install -y git python3-poetry --no-install-recommends - poetry install --only package - - - name: Verify that the Dockerfile matches the commited template and params - run: |- - cp Dockerfile Dockerfile.orig - make Dockerfile - diff Dockerfile.orig Dockerfile - - - name: Build Dangerzone container image - run: | - python3 ./install/common/build-image.py --no-save - - - name: Reproduce the same container image - run: | - ./dev_scripts/reproduce-image.py diff --git a/.github/workflows/release-container-image.yml b/.github/workflows/release-container-image.yml new file mode 100644 index 000000000..df23e896c --- /dev/null +++ b/.github/workflows/release-container-image.yml @@ -0,0 +1,237 @@ +name: Release multi-arch container image + +on: + workflow_dispatch: + push: + branches: + - main + - "test/**" + schedule: + - cron: "0 0 * * *" # Run every day at 00:00 UTC. + +env: + REGISTRY: ghcr.io/${{ github.repository_owner }} + REGISTRY_USER: ${{ github.actor }} + REGISTRY_PASSWORD: ${{ github.token }} + IMAGE_NAME: dangerzone/dangerzone + BUILDKIT_IMAGE: "docker.io/moby/buildkit:v19.0@sha256:14aa1b4dd92ea0a4cd03a54d0c6079046ea98cd0c0ae6176bdd7036ba370cbbe" + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install dev. dependencies + run: |- + sudo apt-get update + sudo apt-get install -y git python3-poetry --no-install-recommends + poetry install --only package + + - name: Verify that the Dockerfile matches the commited template and params + run: |- + cp Dockerfile Dockerfile.orig + make Dockerfile + diff Dockerfile.orig Dockerfile + + prepare: + runs-on: ubuntu-latest + outputs: + debian_archive_date: ${{ steps.params.outputs.debian_archive_date }} + source_date_epoch: ${{ steps.params.outputs.source_date_epoch }} + image: ${{ steps.params.outputs.full_image_name }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Compute image parameters + id: params + run: | + DEBIAN_ARCHIVE_DATE=$(date -u +'%Y%m%d') + SOURCE_DATE_EPOCH=$(date -u -d ${DEBIAN_ARCHIVE_DATE} +"%s") + TAG=${DEBIAN_ARCHIVE_DATE}-$(git describe --long --first-parent | tail -c +2) + FULL_IMAGE_NAME=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${TAG} + + echo "debian_archive_date=${DEBIAN_ARCHIVE_DATE}" >> $GITHUB_OUTPUT + echo "source_date_epoch=${SOURCE_DATE_EPOCH}" >> $GITHUB_OUTPUT + echo "tag=${DEBIAN_ARCHIVE_DATE}-${TAG}" >> $GITHUB_OUTPUT + echo "full_image_name=${FULL_IMAGE_NAME}" >> $GITHUB_OUTPUT + + build: + name: Build ${{ matrix.platform.name }} image + runs-on: ubuntu-24.04${{ matrix.platform.suffix }} + needs: + - prepare + strategy: + fail-fast: false + matrix: + platform: + - suffix: "" + name: "linux/amd64" + - suffix: "-arm" + name: "linux/arm64" + steps: + - uses: actions/checkout@v4 + + - name: Prepare + run: | + platform=${{ matrix.platform.name }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + + - name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Instructions for reproducibly building a container image are taken from: + # https://github.com/freedomofpress/repro-build?tab=readme-ov-file#build-and-push-a-container-image-on-github-actions + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + driver-opts: image=${{ env.BUILDKIT_IMAGE }} + + - name: Build and push by digest + id: build + uses: docker/build-push-action@v6 + with: + context: ./dangerzone/ + file: Dockerfile + build-args: | + DEBIAN_ARCHIVE_DATE=${{ needs.prepare.outputs.debian_archive_date }} + SOURCE_DATE_EPOCH=${{ needs.prepare.outputs.source_date_epoch }} + provenance: false + outputs: type=image,"name=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}",push-by-digest=true,push=true,rewrite-timestamp=true,name-canonical=true + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Export digest + run: | + mkdir -p ${{ runner.temp }}/digests + digest="${{ steps.build.outputs.digest }}" + touch "${{ runner.temp }}/digests/${digest#sha256:}" + echo "Image digest is: ${digest}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-${{ env.PLATFORM_PAIR }} + path: ${{ runner.temp }}/digests/* + if-no-files-found: error + retention-days: 1 + + merge: + runs-on: ubuntu-latest + needs: + - prepare + - build + outputs: + digest_root: ${{ steps.image.outputs.digest_root }} + digest_amd64: ${{ steps.image.outputs.digest_amd64 }} + digest_arm64: ${{ steps.image.outputs.digest_arm64 }} + steps: + - uses: actions/checkout@v4 + + - name: Download digests + uses: actions/download-artifact@v4 + with: + path: ${{ runner.temp }}/digests + pattern: digests-* + merge-multiple: true + + - name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + driver-opts: image=${{ env.BUILDKIT_IMAGE }} + + - name: Create manifest list and push + working-directory: ${{ runner.temp }}/digests + run: | + DIGESTS=$(printf '${{ needs.prepare.outputs.image }}@sha256:%s ' *) + docker buildx imagetools create -t ${{ needs.prepare.outputs.image }} ${DIGESTS} + + - name: Inspect image + id: image + run: | + # Inspect the image + docker buildx imagetools inspect ${{ needs.prepare.outputs.image }} + docker buildx imagetools inspect ${{ needs.prepare.outputs.image }} --format "{{json .Manifest}}" > manifest + + # Calculate and print the digests + digest_root=$(jq -r .digest manifest) + digest_amd64=$(jq -r .manifests[0].digest manifest) + digest_arm64=$(jq -r .manifests[1].digest manifest) + + echo "The image digests are:" + echo " Root: $digest_root" + echo " linux/amd64: $digest_amd64" + echo " linux/arm64: $digest_arm64" + + # NOTE: Set the digests as an output because the `env` context is not + # available to the inputs of a reusable workflow call. + echo "digest_root=$digest_root" >> "$GITHUB_OUTPUT" + echo "digest_amd64=$digest_amd64" >> "$GITHUB_OUTPUT" + echo "digest_arm64=$digest_arm64" >> "$GITHUB_OUTPUT" + + # This step calls the container workflow to generate provenance and push it to + # the container registry. + provenance: + needs: + - prepare + - merge + strategy: + matrix: + digest: + - root + - amd64 + - arm64 + permissions: + actions: read # for detecting the Github Actions environment. + id-token: write # for creating OIDC tokens for signing. + packages: write # for uploading attestations. + uses: slsa-framework/slsa-github-generator/.github/workflows/generator_container_slsa3.yml@v2.0.0 + with: + digest: ${{ needs.merge.outputs[format('digest_{0}', matrix.digest)] }} + image: ${{ needs.prepare.outputs.image }} + registry-username: ${{ github.actor }} + secrets: + registry-password: ${{ secrets.GITHUB_TOKEN }} + + # This step ensures that the image is reproducible + check-reproducibility: + needs: + - prepare + - merge + runs-on: ubuntu-24.04${{ matrix.platform.suffix }} + strategy: + fail-fast: false + matrix: + platform: + - suffix: "" + name: "amd64" + - suffix: "-arm" + name: "arm64" + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Reproduce the same container image + run: | + ./dev_scripts/reproduce-image.py \ + --runtime \ + docker \ + --debian-archive-date \ + ${{ needs.prepare.outputs.debian_archive_date }} \ + --platform \ + linux/${{ matrix.platform.name }} \ + ${{ needs.merge.outputs[format('digest_{0}', matrix.platform.name)] }} diff --git a/.github/workflows/scan.yml b/.github/workflows/scan.yml index e08dcef12..dea72a321 100644 --- a/.github/workflows/scan.yml +++ b/.github/workflows/scan.yml @@ -16,19 +16,12 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Install container build dependencies - run: | - sudo apt install pipx - pipx install poetry - pipx inject poetry poetry-plugin-export - poetry install --only package - - name: Bump date of Debian snapshot archive - run: | - date=$(date "+%Y%m%d") - sed -i "s/DEBIAN_ARCHIVE_DATE=[0-9]\+/DEBIAN_ARCHIVE_DATE=${date}/" Dockerfile.env - make Dockerfile - name: Build container image - run: python3 ./install/common/build-image.py --runtime docker --no-save + run: | + python3 ./install/common/build-image.py \ + --debian-archive-date $(date "+%Y%m%d") \ + --runtime docker + docker load -i share/container.tar - name: Get image tag id: tag run: echo "tag=$(cat share/image-id.txt)" >> $GITHUB_OUTPUT diff --git a/Dockerfile b/Dockerfile index 62f56f8e8..ca36c6e34 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,14 +2,14 @@ # Dockerfile args below. For more info about this file, read # docs/developer/reproducibility.md. -ARG DEBIAN_IMAGE_DATE=20250113 +ARG DEBIAN_IMAGE_DATE=20250224 -FROM debian:bookworm-${DEBIAN_IMAGE_DATE}-slim as dangerzone-image +FROM debian:bookworm-${DEBIAN_IMAGE_DATE}-slim AS dangerzone-image -ARG GVISOR_ARCHIVE_DATE=20250120 -ARG DEBIAN_ARCHIVE_DATE=20250127 -ARG H2ORESTART_CHECKSUM=7760dc2963332c50d15eee285933ec4b48d6a1de9e0c0f6082946f93090bd132 -ARG H2ORESTART_VERSION=v0.7.0 +ARG GVISOR_ARCHIVE_DATE=20250217 +ARG DEBIAN_ARCHIVE_DATE=20250226 +ARG H2ORESTART_CHECKSUM=452331f8603ef456264bd72db6fa8a11ca72b392019a8135c0b2f3095037d7b1 +ARG H2ORESTART_VERSION=v0.7.1 ENV DEBIAN_FRONTEND=noninteractive @@ -22,8 +22,8 @@ RUN \ --mount=type=bind,source=./container_helpers/repro-sources-list.sh,target=/usr/local/bin/repro-sources-list.sh \ --mount=type=bind,source=./container_helpers/gvisor.key,target=/tmp/gvisor.key \ : "Hacky way to set a date for the Debian snapshot repos" && \ - touch -d ${DEBIAN_ARCHIVE_DATE} /etc/apt/sources.list.d/debian.sources && \ - touch -d ${DEBIAN_ARCHIVE_DATE} /etc/apt/sources.list && \ + touch -d ${DEBIAN_ARCHIVE_DATE}Z /etc/apt/sources.list.d/debian.sources && \ + touch -d ${DEBIAN_ARCHIVE_DATE}Z /etc/apt/sources.list && \ repro-sources-list.sh && \ : "Setup APT to install gVisor from its separate APT repo" && \ apt-get update && \ @@ -52,9 +52,13 @@ RUN mkdir /opt/libreoffice_ext && cd /opt/libreoffice_ext \ && rm /root/.wget-hsts # Create an unprivileged user both for gVisor and for running Dangerzone. +# XXX: Make the shadow field "date of last password change" a constant +# number. RUN addgroup --gid 1000 dangerzone RUN adduser --uid 1000 --ingroup dangerzone --shell /bin/true \ - --disabled-password --home /home/dangerzone dangerzone + --disabled-password --home /home/dangerzone dangerzone \ + && chage -d 99999 dangerzone \ + && rm /etc/shadow- # Copy Dangerzone's conversion logic under /opt/dangerzone, and allow Python to # import it. @@ -165,20 +169,47 @@ RUN mkdir /home/dangerzone/.containers # The `ln` binary, even if you specify it by its full path, cannot run # (probably because `ld-linux.so` can't be found). For this reason, we have # to create the symlinks beforehand, in a previous build stage. Then, in an -# empty contianer image (scratch images), we can copy these symlinks and the -# /usr, and stich everything together. +# empty container image (scratch images), we can copy these symlinks and the +# /usr, and stitch everything together. ############################################################################### # Create the filesystem hierarchy that will be used to symlink /usr. -RUN mkdir /new_root -RUN mkdir /new_root/root /new_root/run /new_root/tmp -RUN chmod 777 /new_root/tmp +RUN mkdir -p \ + /new_root \ + /new_root/root \ + /new_root/run \ + /new_root/tmp \ + /new_root/home/dangerzone/dangerzone-image/rootfs + +# XXX: Remove /etc/resolv.conf, so that the network configuration of the host +# does not leak. +RUN cp -r /etc /var /new_root/ \ + && rm /new_root/etc/resolv.conf +RUN cp -r /etc /opt /usr /new_root/home/dangerzone/dangerzone-image/rootfs \ + && rm /new_root/home/dangerzone/dangerzone-image/rootfs/etc/resolv.conf + RUN ln -s /home/dangerzone/dangerzone-image/rootfs/usr /new_root/usr RUN ln -s usr/bin /new_root/bin RUN ln -s usr/lib /new_root/lib RUN ln -s usr/lib64 /new_root/lib64 RUN ln -s usr/sbin /new_root/sbin +RUN ln -s usr/bin /new_root/home/dangerzone/dangerzone-image/rootfs/bin +RUN ln -s usr/lib /new_root/home/dangerzone/dangerzone-image/rootfs/lib +RUN ln -s usr/lib64 /new_root/home/dangerzone/dangerzone-image/rootfs/lib64 + +# Fix permissions in /home/dangerzone, so that our entrypoint script can make +# changes in the following folders. +RUN chown dangerzone:dangerzone \ + /new_root/home/dangerzone \ + /new_root/home/dangerzone/dangerzone-image/ +# Fix permissions in /tmp, so that it can be used by unprivileged users. +RUN chmod 777 /new_root/tmp + +COPY container_helpers/entrypoint.py /new_root +# HACK: For reasons that we are not sure yet, we need to explicitly specify the +# modification time of this file. +RUN touch -d ${DEBIAN_ARCHIVE_DATE}Z /new_root/entrypoint.py ## Final image @@ -188,24 +219,7 @@ FROM scratch # /usr can be a symlink. COPY --from=dangerzone-image /new_root/ / -# Copy the bare minimum to run Dangerzone in the inner container image. -COPY --from=dangerzone-image /etc/ /home/dangerzone/dangerzone-image/rootfs/etc/ -COPY --from=dangerzone-image /opt/ /home/dangerzone/dangerzone-image/rootfs/opt/ -COPY --from=dangerzone-image /usr/ /home/dangerzone/dangerzone-image/rootfs/usr/ -RUN ln -s usr/bin /home/dangerzone/dangerzone-image/rootfs/bin -RUN ln -s usr/lib /home/dangerzone/dangerzone-image/rootfs/lib -RUN ln -s usr/lib64 /home/dangerzone/dangerzone-image/rootfs/lib64 - -# Copy the bare minimum to let the security scanner find vulnerabilities. -COPY --from=dangerzone-image /etc/ /etc/ -COPY --from=dangerzone-image /var/ /var/ - -# Allow our entrypoint script to make changes in the following folders. -RUN chown dangerzone:dangerzone /home/dangerzone /home/dangerzone/dangerzone-image/ - # Switch to the dangerzone user for the rest of the script. USER dangerzone -COPY container_helpers/entrypoint.py / - ENTRYPOINT ["/entrypoint.py"] diff --git a/Dockerfile.env b/Dockerfile.env index 2ab94bd9e..4b98bf935 100644 --- a/Dockerfile.env +++ b/Dockerfile.env @@ -1,9 +1,9 @@ # Can be bumped to the latest date in https://hub.docker.com/_/debian/tags?name=bookworm- -DEBIAN_IMAGE_DATE=20250113 +DEBIAN_IMAGE_DATE=20250224 # Can be bumped to today's date -DEBIAN_ARCHIVE_DATE=20250127 +DEBIAN_ARCHIVE_DATE=20250226 # Can be bumped to the latest date in https://github.com/google/gvisor/tags -GVISOR_ARCHIVE_DATE=20250120 +GVISOR_ARCHIVE_DATE=20250217 # Can be bumped to the latest version and checksum from https://github.com/ebandal/H2Orestart/releases -H2ORESTART_CHECKSUM=7760dc2963332c50d15eee285933ec4b48d6a1de9e0c0f6082946f93090bd132 -H2ORESTART_VERSION=v0.7.0 +H2ORESTART_CHECKSUM=452331f8603ef456264bd72db6fa8a11ca72b392019a8135c0b2f3095037d7b1 +H2ORESTART_VERSION=v0.7.1 diff --git a/Dockerfile.in b/Dockerfile.in index af03c8924..ebc87ca0a 100644 --- a/Dockerfile.in +++ b/Dockerfile.in @@ -4,7 +4,7 @@ ARG DEBIAN_IMAGE_DATE={{DEBIAN_IMAGE_DATE}} -FROM debian:bookworm-${DEBIAN_IMAGE_DATE}-slim as dangerzone-image +FROM debian:bookworm-${DEBIAN_IMAGE_DATE}-slim AS dangerzone-image ARG GVISOR_ARCHIVE_DATE={{GVISOR_ARCHIVE_DATE}} ARG DEBIAN_ARCHIVE_DATE={{DEBIAN_ARCHIVE_DATE}} @@ -22,8 +22,8 @@ RUN \ --mount=type=bind,source=./container_helpers/repro-sources-list.sh,target=/usr/local/bin/repro-sources-list.sh \ --mount=type=bind,source=./container_helpers/gvisor.key,target=/tmp/gvisor.key \ : "Hacky way to set a date for the Debian snapshot repos" && \ - touch -d ${DEBIAN_ARCHIVE_DATE} /etc/apt/sources.list.d/debian.sources && \ - touch -d ${DEBIAN_ARCHIVE_DATE} /etc/apt/sources.list && \ + touch -d ${DEBIAN_ARCHIVE_DATE}Z /etc/apt/sources.list.d/debian.sources && \ + touch -d ${DEBIAN_ARCHIVE_DATE}Z /etc/apt/sources.list && \ repro-sources-list.sh && \ : "Setup APT to install gVisor from its separate APT repo" && \ apt-get update && \ @@ -52,9 +52,13 @@ RUN mkdir /opt/libreoffice_ext && cd /opt/libreoffice_ext \ && rm /root/.wget-hsts # Create an unprivileged user both for gVisor and for running Dangerzone. +# XXX: Make the shadow field "date of last password change" a constant +# number. RUN addgroup --gid 1000 dangerzone RUN adduser --uid 1000 --ingroup dangerzone --shell /bin/true \ - --disabled-password --home /home/dangerzone dangerzone + --disabled-password --home /home/dangerzone dangerzone \ + && chage -d 99999 dangerzone \ + && rm /etc/shadow- # Copy Dangerzone's conversion logic under /opt/dangerzone, and allow Python to # import it. @@ -165,20 +169,47 @@ RUN mkdir /home/dangerzone/.containers # The `ln` binary, even if you specify it by its full path, cannot run # (probably because `ld-linux.so` can't be found). For this reason, we have # to create the symlinks beforehand, in a previous build stage. Then, in an -# empty contianer image (scratch images), we can copy these symlinks and the -# /usr, and stich everything together. +# empty container image (scratch images), we can copy these symlinks and the +# /usr, and stitch everything together. ############################################################################### # Create the filesystem hierarchy that will be used to symlink /usr. -RUN mkdir /new_root -RUN mkdir /new_root/root /new_root/run /new_root/tmp -RUN chmod 777 /new_root/tmp +RUN mkdir -p \ + /new_root \ + /new_root/root \ + /new_root/run \ + /new_root/tmp \ + /new_root/home/dangerzone/dangerzone-image/rootfs + +# XXX: Remove /etc/resolv.conf, so that the network configuration of the host +# does not leak. +RUN cp -r /etc /var /new_root/ \ + && rm /new_root/etc/resolv.conf +RUN cp -r /etc /opt /usr /new_root/home/dangerzone/dangerzone-image/rootfs \ + && rm /new_root/home/dangerzone/dangerzone-image/rootfs/etc/resolv.conf + RUN ln -s /home/dangerzone/dangerzone-image/rootfs/usr /new_root/usr RUN ln -s usr/bin /new_root/bin RUN ln -s usr/lib /new_root/lib RUN ln -s usr/lib64 /new_root/lib64 RUN ln -s usr/sbin /new_root/sbin +RUN ln -s usr/bin /new_root/home/dangerzone/dangerzone-image/rootfs/bin +RUN ln -s usr/lib /new_root/home/dangerzone/dangerzone-image/rootfs/lib +RUN ln -s usr/lib64 /new_root/home/dangerzone/dangerzone-image/rootfs/lib64 + +# Fix permissions in /home/dangerzone, so that our entrypoint script can make +# changes in the following folders. +RUN chown dangerzone:dangerzone \ + /new_root/home/dangerzone \ + /new_root/home/dangerzone/dangerzone-image/ +# Fix permissions in /tmp, so that it can be used by unprivileged users. +RUN chmod 777 /new_root/tmp + +COPY container_helpers/entrypoint.py /new_root +# HACK: For reasons that we are not sure yet, we need to explicitly specify the +# modification time of this file. +RUN touch -d ${DEBIAN_ARCHIVE_DATE}Z /new_root/entrypoint.py ## Final image @@ -188,24 +219,7 @@ FROM scratch # /usr can be a symlink. COPY --from=dangerzone-image /new_root/ / -# Copy the bare minimum to run Dangerzone in the inner container image. -COPY --from=dangerzone-image /etc/ /home/dangerzone/dangerzone-image/rootfs/etc/ -COPY --from=dangerzone-image /opt/ /home/dangerzone/dangerzone-image/rootfs/opt/ -COPY --from=dangerzone-image /usr/ /home/dangerzone/dangerzone-image/rootfs/usr/ -RUN ln -s usr/bin /home/dangerzone/dangerzone-image/rootfs/bin -RUN ln -s usr/lib /home/dangerzone/dangerzone-image/rootfs/lib -RUN ln -s usr/lib64 /home/dangerzone/dangerzone-image/rootfs/lib64 - -# Copy the bare minimum to let the security scanner find vulnerabilities. -COPY --from=dangerzone-image /etc/ /etc/ -COPY --from=dangerzone-image /var/ /var/ - -# Allow our entrypoint script to make changes in the following folders. -RUN chown dangerzone:dangerzone /home/dangerzone /home/dangerzone/dangerzone-image/ - # Switch to the dangerzone user for the rest of the script. USER dangerzone -COPY container_helpers/entrypoint.py / - ENTRYPOINT ["/entrypoint.py"] diff --git a/INSTALL.md b/INSTALL.md index a8e5d2e9f..a7ee13552 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -297,7 +297,7 @@ Our [GitHub Releases page](https://github.com/freedomofpress/dangerzone/releases hosts the following files: * Windows installer (`Dangerzone-.msi`) * macOS archives (`Dangerzone--.dmg`) -* Container images (`container--.tar.gz`) +* Container images (`container--.tar`) * Source package (`dangerzone-.tar.gz`) All these files are accompanied by signatures (as `.asc` files). We'll explain @@ -325,7 +325,7 @@ gpg --verify Dangerzone-0.6.1-i686.dmg.asc Dangerzone-0.6.1-i686.dmg For the container images: ``` -gpg --verify container-0.6.1-i686.tar.gz.asc container-0.6.1-i686.tar.gz +gpg --verify container-0.6.1-i686.tar.asc container-0.6.1-i686.tar ``` For the source package: diff --git a/RELEASE.md b/RELEASE.md index b2b490bf6..cbdc68174 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -149,7 +149,7 @@ Here is what you need to do: poetry run ./install/common/download-tessdata.py # Copy the container image to the assets folder - cp share/container.tar.gz ~dz/release-assets/$VERSION/dangerzone-$VERSION-arm64.tar.gz + cp share/container.tar ~dz/release-assets/$VERSION/dangerzone-$VERSION-arm64.tar cp share/image-id.txt ~dz/release-assets/$VERSION/. ``` @@ -226,7 +226,7 @@ The Windows release is performed in a Windows 11 virtual machine (as opposed to - [ ] Copy the container image into the VM > [!IMPORTANT] - > Instead of running `python .\install\windows\build-image.py` in the VM, run the build image script on the host (making sure to build for `linux/amd64`). Copy `share/container.tar.gz` and `share/image-id.txt` from the host into the `share` folder in the VM. + > Instead of running `python .\install\windows\build-image.py` in the VM, run the build image script on the host (making sure to build for `linux/amd64`). Copy `share/container.tar` and `share/image-id.txt` from the host into the `share` folder in the VM. - [ ] Run `poetry run .\install\windows\build-app.bat` - [ ] When you're done you will have `dist\Dangerzone.msi` @@ -317,9 +317,8 @@ To publish the release, you can follow these steps: - [ ] Run container scan on the produced container images (some time may have passed since the artifacts were built) ```bash - gunzip --keep -c ./share/container.tar.gz > /tmp/container.tar docker pull anchore/grype:latest - docker run --rm -v /tmp/container.tar:/container.tar anchore/grype:latest /container.tar + docker run --rm -v ./share/container.tar:/container.tar anchore/grype:latest /container.tar ``` - [ ] Collect the assets in a single directory, calculate their SHA-256 hashes, and sign them. diff --git a/dangerzone/container_utils.py b/dangerzone/container_utils.py index 99c9a0803..ee528d0dd 100644 --- a/dangerzone/container_utils.py +++ b/dangerzone/container_utils.py @@ -1,4 +1,3 @@ -import gzip import logging import platform import shutil @@ -96,18 +95,26 @@ def list_image_tags() -> List[str]: ) +def add_image_tag(image_id: str, new_tag: str) -> None: + """Add a tag to the Dangerzone image.""" + log.debug(f"Adding tag '{new_tag}' to image '{image_id}'") + subprocess.check_output( + [get_runtime(), "tag", image_id, new_tag], + startupinfo=get_subprocess_startupinfo(), + ) + + def delete_image_tag(tag: str) -> None: """Delete a Dangerzone image tag.""" - name = CONTAINER_NAME + ":" + tag - log.warning(f"Deleting old container image: {name}") + log.warning(f"Deleting old container image: {tag}") try: subprocess.check_output( - [get_runtime(), "rmi", "--force", name], + [get_runtime(), "rmi", "--force", tag], startupinfo=get_subprocess_startupinfo(), ) except Exception as e: log.warning( - f"Couldn't delete old container image '{name}', so leaving it there." + f"Couldn't delete old container image '{tag}', so leaving it there." f" Original error: {e}" ) @@ -120,30 +127,46 @@ def get_expected_tag() -> str: def load_image_tarball() -> None: log.info("Installing Dangerzone container image...") - p = subprocess.Popen( - [get_runtime(), "load"], - stdin=subprocess.PIPE, - startupinfo=get_subprocess_startupinfo(), - ) - - chunk_size = 4 << 20 - compressed_container_path = get_resource_path("container.tar.gz") - with gzip.open(compressed_container_path) as f: - while True: - chunk = f.read(chunk_size) - if len(chunk) > 0: - if p.stdin: - p.stdin.write(chunk) + tarball_path = get_resource_path("container.tar") + with open(tarball_path) as f: + try: + res = subprocess.run( + [get_runtime(), "load"], + stdin=f, + startupinfo=get_subprocess_startupinfo(), + capture_output=True, + check=True, + ) + except subprocess.CalledProcessError as e: + if e.stderr: + error = e.stderr.decode() else: - break - _, err = p.communicate() - if p.returncode < 0: - if err: - error = err.decode() - else: - error = "No output" - raise errors.ImageInstallationException( - f"Could not install container image: {error}" + error = "No output" + raise errors.ImageInstallationException( + f"Could not install container image: {error}" + ) + + # Loading an image built with Buildkit in Podman 3.4 messes up its name. The tag + # somehow becomes the name of the loaded image [1]. + # + # We know that older Podman versions are not generally affected, since Podman v3.0.1 + # on Debian Bullseye works properly. Also, Podman v4.0 is not affected, so it makes + # sense to target only Podman v3.4 for a fix. + # + # The fix is simple, tag the image properly based on the expected tag from + # `share/image-id.txt` and delete the incorrect tag. + # + # [1] https://github.com/containers/podman/issues/16490 + if get_runtime_name() == "podman" and get_runtime_version() == (3, 4): + expected_tag = get_expected_tag() + bad_tag = f"localhost/{expected_tag}:latest" + good_tag = f"{CONTAINER_NAME}:{expected_tag}" + + log.debug( + f"Dangerzone images loaded in Podman v3.4 usually have an invalid tag." + " Fixing it..." ) + add_image_tag(bad_tag, good_tag) + delete_image_tag(bad_tag) - log.info("Successfully installed container image from") + log.info("Successfully installed container image") diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py index 0213cde9c..1cd80ccea 100644 --- a/dangerzone/isolation_provider/container.py +++ b/dangerzone/isolation_provider/container.py @@ -97,6 +97,7 @@ def install() -> bool: f"Could not find a Dangerzone container image with tag '{expected_tag}'" ) for tag in old_tags: + tag = container_utils.CONTAINER_NAME + ":" + tag container_utils.delete_image_tag(tag) else: return True diff --git a/dangerzone/isolation_provider/qubes.py b/dangerzone/isolation_provider/qubes.py index 02f80029b..a7e36b576 100644 --- a/dangerzone/isolation_provider/qubes.py +++ b/dangerzone/isolation_provider/qubes.py @@ -130,7 +130,7 @@ def is_qubes_native_conversion() -> bool: # This disambiguates if it is running a Qubes targetted build or not # (Qubes-specific builds don't ship the container image) - compressed_container_path = get_resource_path("container.tar.gz") + compressed_container_path = get_resource_path("container.tar") return not os.path.exists(compressed_container_path) else: return False diff --git a/dev_scripts/repro-build b/dev_scripts/repro-build new file mode 100755 index 000000000..d8b861df6 --- /dev/null +++ b/dev_scripts/repro-build @@ -0,0 +1,665 @@ +#!/usr/bin/env python3 + +import argparse +import datetime +import hashlib +import json +import logging +import os +import pprint +import shlex +import shutil +import subprocess +import sys +import tarfile +from pathlib import Path + +logger = logging.getLogger(__name__) + +MEDIA_TYPE_INDEX_V1_JSON = "application/vnd.oci.image.index.v1+json" +MEDIA_TYPE_MANIFEST_V1_JSON = "application/vnd.oci.image.manifest.v1+json" + +ENV_RUNTIME = "REPRO_RUNTIME" +ENV_DATETIME = "REPRO_DATETIME" +ENV_SDE = "REPRO_SOURCE_DATE_EPOCH" +ENV_CACHE = "REPRO_CACHE" +ENV_BUILDKIT = "REPRO_BUILDKIT_IMAGE" +ENV_ROOTLESS = "REPRO_ROOTLESS" + +DEFAULT_BUILDKIT_IMAGE = "moby/buildkit:v0.19.0@sha256:14aa1b4dd92ea0a4cd03a54d0c6079046ea98cd0c0ae6176bdd7036ba370cbbe" +DEFAULT_BUILDKIT_IMAGE_ROOTLESS = "moby/buildkit:v0.19.0-rootless@sha256:e901cffdad753892a7c3afb8b9972549fca02c73888cf340c91ed801fdd96d71" + +MSG_BUILD_CTX = """Build environment: +- Container runtime: {runtime} +- BuildKit image: {buildkit_image} +- Rootless support: {rootless} +- Caching enabled: {use_cache} +- Build context: {context} +- Dockerfile: {dockerfile} +- Output: {output} + +Build parameters: +- SOURCE_DATE_EPOCH: {sde} +- Build args: {build_args} +- Tag: {tag} +- Platform: {platform} + +Podman-only arguments: +- BuildKit arguments: {buildkit_args} + +Docker-only arguments: +- Docker Buildx arguments: {buildx_args} +""" + + +def pretty_error(obj: dict, msg: str): + raise Exception(f"{msg}\n{pprint.pprint(obj)}") + + +def get_key(obj: dict, key: str) -> object: + if key not in obj: + pretty_error(f"Could not find key '{key}' in the dictionary:", obj) + return obj[key] + + +def run(cmd, dry=False, check=True): + action = "Would have run" if dry else "Running" + logger.debug(f"{action}: {shlex.join(cmd)}") + if not dry: + subprocess.run(cmd, check=check) + + +def snip_contents(contents: str, num: int) -> str: + contents = contents.replace("\n", "") + if len(contents) > num: + return ( + contents[:num] + + f" [... {len(contents) - num} characters omitted." + + " Pass --show-contents to print them in their entirety]" + ) + return contents + + +def detect_container_runtime() -> str: + """Auto-detect the installed container runtime in the system.""" + if shutil.which("docker"): + return "docker" + elif shutil.which("podman"): + return "podman" + else: + return None + + +def parse_runtime(args) -> str: + if args.runtime is not None: + return args.runtime + + runtime = os.environ.get(ENV_RUNTIME) + if runtime is None: + raise RuntimeError("No container runtime detected in your system") + if runtime not in ("docker", "podman"): + raise RuntimeError( + "Only 'docker' or 'podman' container runtimes" + " are currently supported by this script" + ) + + +def parse_use_cache(args) -> bool: + if args.no_cache: + return False + return bool(int(os.environ.get(ENV_CACHE, "1"))) + + +def parse_rootless(args, runtime: str) -> bool: + rootless = args.rootless or bool(int(os.environ.get(ENV_ROOTLESS, "0"))) + if runtime != "podman" and rootless: + raise RuntimeError("Rootless mode is only supported with Podman runtime") + return rootless + + +def parse_sde(args) -> str: + sde = os.environ.get(ENV_SDE, args.source_date_epoch) + dt = os.environ.get(ENV_DATETIME, args.datetime) + + if (sde is not None and dt is not None) or (sde is None and dt is None): + raise RuntimeError("You need to pass either a source date epoch or a datetime") + + if sde is not None: + return str(sde) + + if dt is not None: + d = datetime.datetime.fromisoformat(dt) + # If the datetime is naive, assume its timezone is UTC. The check is + # taken from: + # https://docs.python.org/3/library/datetime.html#determining-if-an-object-is-aware-or-naive + if d.tzinfo is None or d.tzinfo.utcoffset(d) is None: + d = d.replace(tzinfo=datetime.timezone.utc) + return int(d.timestamp()) + + +def parse_buildkit_image(args, rootless: bool, runtime: str) -> str: + default = DEFAULT_BUILDKIT_IMAGE_ROOTLESS if rootless else DEFAULT_BUILDKIT_IMAGE + img = args.buildkit_image or os.environ.get(ENV_BUILDKIT, default) + + if runtime == "podman" and not img.startswith("docker.io/"): + img = "docker.io/" + img + + return img + + +def parse_build_args(args) -> str: + return args.build_arg or [] + + +def parse_buildkit_args(args, runtime: str) -> str: + if not args.buildkit_args: + return [] + + if runtime != "podman": + raise RuntimeError("Cannot specify BuildKit arguments using the Podman runtime") + + return shlex.split(args.buildkit_args) + + +def parse_buildx_args(args, runtime: str) -> str: + if not args.buildx_args: + return [] + + if runtime != "docker": + raise RuntimeError( + "Cannot specify Docker Buildx arguments using the Podman runtime" + ) + + return shlex.split(args.buildx_args) + + +def parse_image_digest(args) -> str | None: + if not args.expected_image_digest: + return None + parsed = args.expected_image_digest.split(":", 1) + if len(parsed) == 1: + return parsed[0] + else: + return parsed[1] + + +def parse_path(path: str | None) -> str | None: + return path and str(Path(path).absolute()) + + +########################## +# OCI parsing logic +# +# Compatible with: +# * https://github.com/opencontainers/image-spec/blob/main/image-layout.md + + +def oci_print_info(parsed: dict, full: bool) -> None: + print(f"The OCI tarball contains an index and {len(parsed) - 1} manifest(s):") + print() + print(f"Image digest: {parsed[1]['digest']}") + for i, info in enumerate(parsed): + print() + if i == 0: + print(f"Index ({info['path']}):") + else: + print(f"Manifest {i} ({info['path']}):") + print(f" Digest: {info['digest']}") + print(f" Media type: {info['media_type']}") + print(f" Platform: {info['platform'] or '-'}") + contents = info["contents"] if full else snip_contents(info["contents"], 600) + print(f" Contents: {contents}") + print() + + +def oci_normalize_path(path): + if path.startswith("sha256:"): + hash_algo, checksum = path.split(":") + path = f"blobs/{hash_algo}/{checksum}" + return path + + +def oci_get_file_from_tarball(tar: tarfile.TarFile, path: str) -> dict: + return + + +def oci_parse_manifest(tar: tarfile.TarFile, path: str, platform: dict | None) -> dict: + """Parse manifest information in JSON format. + + Interestingly, the platform info for a manifest is not included in the + manifest itself, but in the descriptor that points to it. So, we have to + carry it from the previous manifest and include in the info here. + """ + path = oci_normalize_path(path) + contents = tar.extractfile(path).read().decode() + digest = "sha256:" + hashlib.sha256(contents.encode()).hexdigest() + contents_dict = json.loads(contents) + media_type = get_key(contents_dict, "mediaType") + manifests = contents_dict.get("manifests", []) + + if platform: + os = get_key(platform, "os") + arch = get_key(platform, "architecture") + platform = f"{os}/{arch}" + + return { + "path": path, + "contents": contents, + "digest": digest, + "media_type": media_type, + "platform": platform, + "manifests": manifests, + } + + +def oci_parse_manifests_dfs( + tar: tarfile.TarFile, path: str, parsed: list, platform: dict | None = None +) -> None: + info = oci_parse_manifest(tar, path, platform) + parsed.append(info) + for m in info["manifests"]: + oci_parse_manifests_dfs(tar, m["digest"], parsed, m.get("platform")) + + +def oci_parse_tarball(path: Path) -> dict: + parsed = [] + with tarfile.TarFile.open(path) as tar: + oci_parse_manifests_dfs(tar, "index.json", parsed) + return parsed + + +########################## +# Image building logic + + +def podman_build( + context: str, + dockerfile: str | None, + tag: str | None, + buildkit_image: str, + sde: int, + rootless: bool, + use_cache: bool, + output: Path, + build_args: list, + platform: str, + buildkit_args: list, + dry: bool, +): + rootless_args = [] + rootful_args = [] + if rootless: + rootless_args = [ + "--userns", + "keep-id:uid=1000,gid=1000", + "--security-opt", + "seccomp=unconfined", + "--security-opt", + "apparmor=unconfined", + "-e", + "BUILDKITD_FLAGS=--oci-worker-no-process-sandbox", + ] + else: + rootful_args = ["--privileged"] + + dockerfile_args_podman = [] + dockerfile_args_buildkit = [] + if dockerfile: + dockerfile_args_podman = ["-v", f"{dockerfile}:/tmp/Dockerfile"] + dockerfile_args_buildkit = ["--local", "dockerfile=/tmp"] + else: + dockerfile_args_buildkit = ["--local", "dockerfile=/tmp/work"] + + tag_args = f",name={tag}" if tag else "" + + cache_args = [] + if use_cache: + cache_args = [ + "--export-cache", + "type=local,mode=max,dest=/tmp/cache", + "--import-cache", + "type=local,src=/tmp/cache", + ] + + _build_args = [] + for arg in build_args: + _build_args.append("--opt") + _build_args.append(f"build-arg:{arg}") + platform_args = ["--opt", f"platform={platform}"] if platform else [] + + cmd = [ + "podman", + "run", + "-it", + "--rm", + "-v", + "buildkit_cache:/tmp/cache", + "-v", + f"{output.parent}:/tmp/image", + "-v", + f"{context}:/tmp/work", + "--entrypoint", + "buildctl-daemonless.sh", + *rootless_args, + *rootful_args, + *dockerfile_args_podman, + buildkit_image, + "build", + "--frontend", + "dockerfile.v0", + "--local", + "context=/tmp/work", + "--opt", + f"build-arg:SOURCE_DATE_EPOCH={sde}", + *_build_args, + "--output", + f"type=docker,dest=/tmp/image/{output.name},rewrite-timestamp=true{tag_args}", + *cache_args, + *dockerfile_args_buildkit, + *platform_args, + *buildkit_args, + ] + + run(cmd, dry) + + +def docker_build( + context: str, + dockerfile: str | None, + tag: str | None, + buildkit_image: str, + sde: int, + use_cache: bool, + output: Path, + build_args: list, + platform: str, + buildx_args: list, + dry: bool, +): + builder_id = hashlib.sha256(buildkit_image.encode()).hexdigest() + builder_name = f"repro-build-{builder_id}" + tag_args = ["-t", tag] if tag else [] + cache_args = [] if use_cache else ["--no-cache", "--pull"] + + cmd = [ + "docker", + "buildx", + "create", + "--name", + builder_name, + "--driver-opt", + f"image={buildkit_image}", + ] + run(cmd, dry, check=False) + + dockerfile_args = ["-f", dockerfile] if dockerfile else [] + _build_args = [] + for arg in build_args: + _build_args.append("--build-arg") + _build_args.append(arg) + platform_args = ["--platform", platform] if platform else [] + + cmd = [ + "docker", + "buildx", + "--builder", + builder_name, + "build", + "--build-arg", + f"SOURCE_DATE_EPOCH={sde}", + *_build_args, + "--provenance", + "false", + "--output", + f"type=docker,dest={output},rewrite-timestamp=true", + *cache_args, + *tag_args, + *dockerfile_args, + *platform_args, + *buildx_args, + context, + ] + run(cmd, dry) + + +########################## +# Command logic + + +def build(args): + runtime = parse_runtime(args) + use_cache = parse_use_cache(args) + sde = parse_sde(args) + rootless = parse_rootless(args, runtime) + buildkit_image = parse_buildkit_image(args, rootless, runtime) + build_args = parse_build_args(args) + platform = args.platform + buildkit_args = parse_buildkit_args(args, runtime) + buildx_args = parse_buildx_args(args, runtime) + tag = args.tag + dockerfile = parse_path(args.file) + output = Path(parse_path(args.output)) + dry = args.dry + context = parse_path(args.context) + + logger.info( + MSG_BUILD_CTX.format( + runtime=runtime, + buildkit_image=buildkit_image, + sde=sde, + rootless=rootless, + use_cache=use_cache, + context=context, + dockerfile=dockerfile or "(not provided)", + tag=tag or "(not provided)", + output=output, + build_args=",".join(build_args) or "(not provided)", + platform=platform or "(default)", + buildkit_args=" ".join(buildkit_args) or "(not provided)", + buildx_args=" ".join(buildx_args) or "(not provided)", + ) + ) + + try: + if runtime == "docker": + docker_build( + context, + dockerfile, + tag, + buildkit_image, + sde, + use_cache, + output, + build_args, + platform, + buildx_args, + dry, + ) + else: + podman_build( + context, + dockerfile, + tag, + buildkit_image, + sde, + rootless, + use_cache, + output, + build_args, + platform, + buildkit_args, + dry, + ) + except subprocess.CalledProcessError as e: + logger.error(f"Failed with {e.returncode}") + sys.exit(e.returncode) + + +def analyze(args) -> None: + expected_image_digest = parse_image_digest(args) + tarball_path = Path(args.tarball) + + parsed = oci_parse_tarball(tarball_path) + oci_print_info(parsed, args.show_contents) + + if expected_image_digest: + cur_digest = parsed[1]["digest"].split(":")[1] + if cur_digest != expected_image_digest: + raise Exception( + f"The image does not have the expected digest: {cur_digest} != {expected_image_digest}" + ) + print(f"✅ Image digest matches {expected_image_digest}") + + +def define_build_cmd_args(parser: argparse.ArgumentParser) -> None: + parser.add_argument( + "--runtime", + choices=["docker", "podman"], + default=detect_container_runtime(), + help="The container runtime for building the image (default: %(default)s)", + ) + parser.add_argument( + "--datetime", + metavar="YYYY-MM-DD", + default=None, + help=( + "Provide a date and (optionally) a time in ISO format, which will" + " be used as the timestamp of the image layers" + ), + ) + parser.add_argument( + "--buildkit-image", + metavar="NAME:TAG@DIGEST", + default=None, + help=( + "The BuildKit container image which will be used for building the" + " reproducible container image. Make sure to pass the '-rootless'" + " variant if you are using rootless Podman" + " (default: docker.io/moby/buildkit:v0.19.0)" + ), + ) + parser.add_argument( + "--source-date-epoch", + "--sde", + metavar="SECONDS", + type=int, + default=None, + help="Provide a Unix timestamp for the image layers", + ) + parser.add_argument( + "--no-cache", + default=False, + action="store_true", + help="Do not use existing cached images for the container build. Build from the start with a new set of cached layers.", + ) + parser.add_argument( + "--rootless", + default=False, + action="store_true", + help="Run BuildKit in rootless mode (Podman only)", + ) + parser.add_argument( + "-f", + "--file", + metavar="FILE", + default=None, + help="Pathname of a Dockerfile", + ) + parser.add_argument( + "-o", + "--output", + metavar="FILE", + default=Path.cwd() / "image.tar", + help="Path to save OCI tarball (default: %(default)s)", + ) + parser.add_argument( + "-t", + "--tag", + metavar="TAG", + default=None, + help="Tag the built image with the name %(metavar)s", + ) + parser.add_argument( + "--build-arg", + metavar="ARG=VALUE", + action="append", + default=None, + help="Set build-time variables", + ) + parser.add_argument( + "--platform", + metavar="PLAT1,PLAT2", + default=None, + help="Set platform for the image", + ) + parser.add_argument( + "--buildkit-args", + metavar="'ARG1 ARG2'", + default=None, + help="Extra arguments for BuildKit (Podman only)", + ) + parser.add_argument( + "--buildx-args", + metavar="'ARG1 ARG2'", + default=None, + help="Extra arguments for Docker Buildx (Docker only)", + ) + parser.add_argument( + "--dry", + default=False, + action="store_true", + help="Do not run any commands, just print what would happen", + ) + parser.add_argument( + "context", + metavar="CONTEXT", + help="Path to the build context", + ) + + +def parse_args() -> dict: + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers(dest="command", help="Available commands") + + build_parser = subparsers.add_parser("build", help="Perform a build operation") + build_parser.set_defaults(func=build) + define_build_cmd_args(build_parser) + + analyze_parser = subparsers.add_parser("analyze", help="Analyze an OCI tarball") + analyze_parser.set_defaults(func=analyze) + analyze_parser.add_argument( + "tarball", + metavar="FILE", + help="Path to OCI image in .tar format", + ) + analyze_parser.add_argument( + "--expected-image-digest", + metavar="DIGEST", + default=None, + help="The expected digest for the provided image", + ) + analyze_parser.add_argument( + "--show-contents", + default=False, + action="store_true", + help="Show full file contents", + ) + + return parser.parse_args() + + +def main() -> None: + logging.basicConfig( + level=logging.DEBUG, + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + args = parse_args() + + if not hasattr(args, "func"): + args.func = build + args.func(args) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/dev_scripts/reproduce-image.py b/dev_scripts/reproduce-image.py index 0f757ae30..153461d8c 100755 --- a/dev_scripts/reproduce-image.py +++ b/dev_scripts/reproduce-image.py @@ -4,6 +4,7 @@ import hashlib import logging import pathlib +import platform import stat import subprocess import sys @@ -11,131 +12,72 @@ logger = logging.getLogger(__name__) -DIFFOCI_URL = "https://github.com/reproducible-containers/diffoci/releases/download/v0.1.5/diffoci-v0.1.5.linux-amd64" -DIFFOCI_CHECKSUM = "01d25fe690196945a6bd510d30559338aa489c034d3a1b895a0d82a4b860698f" -DIFFOCI_PATH = ( - pathlib.Path.home() / ".local" / "share" / "dangerzone-dev" / "helpers" / "diffoci" -) -IMAGE_NAME = "dangerzone.rocks/dangerzone" +if platform.system() in ["Darwin", "Windows"]: + CONTAINER_RUNTIME = "docker" +elif platform.system() == "Linux": + CONTAINER_RUNTIME = "podman" def run(*args): - """Simple function that runs a command, validates it, and returns the output""" + """Simple function that runs a command and checks the result.""" logger.debug(f"Running command: {' '.join(args)}") - return subprocess.run( - args, - check=True, - stdout=subprocess.PIPE, - ).stdout + return subprocess.run(args, check=True) -def git_commit_get(): - return run("git", "rev-parse", "--short", "HEAD").decode().strip() - - -def git_determine_tag(): - return run("git", "describe", "--long", "--first-parent").decode().strip()[1:] - - -def git_verify(commit, source): - if not commit in source: - raise RuntimeError( - f"Image '{source}' does not seem to be built from commit '{commit}'" - ) - - -def diffoci_hash_matches(diffoci): - """Check if the hash of the downloaded diffoci bin matches the expected one.""" - m = hashlib.sha256() - m.update(diffoci) - diffoci_checksum = m.hexdigest() - return diffoci_checksum == DIFFOCI_CHECKSUM - - -def diffoci_is_installed(): - """Determine if diffoci has been installed. - - Determine if diffoci has been installed, by checking if the binary exists, and if - its hash is the expected one. If the binary exists but the hash is different, then - this is a sign that we need to update the local diffoci binary. - """ - if not DIFFOCI_PATH.exists(): - return False - return diffoci_hash_matches(DIFFOCI_PATH.open("rb").read()) - - -def diffoci_download(): - """Download the diffoci tool, based on a URL and its checksum.""" - with urllib.request.urlopen(DIFFOCI_URL) as f: - diffoci_bin = f.read() - - if not diffoci_hash_matches(diffoci_bin): - raise ValueError( - "Unexpected checksum for downloaded diffoci binary:" - f" {diffoci_checksum} !={DIFFOCI_CHECKSUM}" - ) - - DIFFOCI_PATH.parent.mkdir(parents=True, exist_ok=True) - DIFFOCI_PATH.open("wb+").write(diffoci_bin) - DIFFOCI_PATH.chmod(DIFFOCI_PATH.stat().st_mode | stat.S_IEXEC) - - -def diffoci_diff(source, local_target): - """Diff the source image against the recently built target image using diffoci.""" - target = f"podman://{local_target}" - try: - return run( - str(DIFFOCI_PATH), - "diff", - source, - target, - "--semantic", - "--verbose", - ) - except subprocess.CalledProcessError as e: - error = e.stdout.decode() - raise RuntimeError( - f"Could not rebuild an identical image to {source}. Diffoci report:\n{error}" - ) - - -def build_image(tag, use_cache=False): +def build_image( + platform=None, + runtime=None, + cache=True, + date=None, +): """Build the Dangerzone container image with a special tag.""" + platform_args = [] if not platform else ["--platform", platform] + runtime_args = [] if not runtime else ["--runtime", runtime] + cache_args = [] if cache else ["--use-cache", "no"] + date_args = [] if not date else ["--debian-archive-date", date] run( "python3", "./install/common/build-image.py", - "--no-save", - "--use-cache", - str(use_cache), - "--tag", - tag, + *platform_args, + *runtime_args, + *cache_args, + *date_args, ) def parse_args(): - image_tag = git_determine_tag() - # TODO: Remove the local "podman://" prefix once we have started pushing images to a - # remote. - default_image_name = f"podman://{IMAGE_NAME}:{image_tag}" - parser = argparse.ArgumentParser( prog=sys.argv[0], description="Dev script for verifying container image reproducibility", ) parser.add_argument( - "--source", - default=default_image_name, - help=( - "The name of the image that you want to reproduce. If the image resides in" - " the local Docker / Podman engine, you can prefix it with podman:// or" - f" docker:// accordingly (default: {default_image_name})" - ), + "--platform", + default=None, + help=f"The platform for building the image (default: current platform)", + ) + parser.add_argument( + "--runtime", + choices=["docker", "podman"], + default=CONTAINER_RUNTIME, + help=f"The container runtime for building the image (default: {CONTAINER_RUNTIME})", ) parser.add_argument( - "--use-cache", + "--no-cache", default=False, action="store_true", - help="Whether to reuse the build cache (off by default for better reproducibility)", + help=( + "Do not use existing cached images for the container build." + " Build from the start with a new set of cached layers." + ), + ) + parser.add_argument( + "--debian-archive-date", + default=None, + help="Use a specific Debian snapshot archive, by its date", + ) + parser.add_argument( + "digest", + help="The digest of the image that you want to reproduce", ) return parser.parse_args() @@ -148,32 +90,25 @@ def main(): ) args = parse_args() - logger.info(f"Ensuring that current Git commit matches image '{args.source}'") - commit = git_commit_get() - git_verify(commit, args.source) - - if not diffoci_is_installed(): - logger.info(f"Downloading diffoci helper from {DIFFOCI_URL}") - diffoci_download() - - tag = f"reproduce-{commit}" - target = f"{IMAGE_NAME}:{tag}" - logger.info(f"Building container image and tagging it as '{target}'") - build_image(tag, args.use_cache) + logger.info(f"Building container image") + build_image( + args.platform, + args.runtime, + not args.no_cache, + args.debian_archive_date, + ) logger.info( - f"Ensuring that source image '{args.source}' is semantically identical with" - f" built image '{target}'" + f"Check that the reproduced image has the expected digest: {args.digest}" + ) + run( + "./dev_scripts/repro-build", + "analyze", + "--show-contents", + "share/container.tar", + "--expected-image-digest", + args.digest, ) - try: - diffoci_diff(args.source, target) - except subprocess.CalledProcessError as e: - raise RuntimeError( - f"Could not reproduce image {args.source} for commit {commit}" - ) - breakpoint() - - logger.info(f"Successfully reproduced image '{args.source}' from commit '{commit}'") if __name__ == "__main__": diff --git a/dev_scripts/sign-assets.py b/dev_scripts/sign-assets.py index d59461d93..fecf002eb 100755 --- a/dev_scripts/sign-assets.py +++ b/dev_scripts/sign-assets.py @@ -11,8 +11,8 @@ DZ_ASSETS = [ - "container-{version}-i686.tar.gz", - "container-{version}-arm64.tar.gz", + "container-{version}-i686.tar", + "container-{version}-arm64.tar", "Dangerzone-{version}.msi", "Dangerzone-{version}-arm64.dmg", "Dangerzone-{version}-i686.dmg", diff --git a/docs/developer/reproducibility.md b/docs/developer/reproducibility.md index 6d370870f..934e5a617 100644 --- a/docs/developer/reproducibility.md +++ b/docs/developer/reproducibility.md @@ -47,21 +47,21 @@ trigger a CI error. For a simple way to reproduce a Dangerzone container image, you can checkout the commit this image was built from (you can find it from the image tag in its -`g` portion), and run the following command in a Linux environment: +`g` portion), retrieve the date it was built (also included in the image +tag), and run the following command in any environment: ``` -./dev_scripts/reproduce-image.py --source +./dev_scripts/reproduce-image.py \ + --debian-archive-date \ + ``` -This command will download the `diffoci` helper, build a container image from -the current Git commit, and ensure that the built image matches the source one, -with the exception of image names and file timestamps. - -> [!TIP] -> If the source image is not pushed to a registry, and is local instead, you -> can prefix it with `docker://` or `podman://` accordingly, so that `diffoci` -> can load it from the local Docker / Podman container engine. For example: -> -> ``` -> ./dev_scripts/reproduce.py --source podman://dangerzone.rocks/dangerzone:0.8.0-125-g725ce3b -> ``` +where: +* `` should be given in YYYYMMDD format, e.g, 20250226 +* `` is the SHA-256 hash of the image for the **current platform**, with + or without the `sha256:` prefix. + +This command will build a container image from the current Git commit and the +provided date for the Debian archives. Then, it will compare the digest of the +manifest against the provided one. This is a simple way to ensure that the +created image is bit-for-bit reproducible. diff --git a/dodo.py b/dodo.py index 2022ffae0..1b6472daf 100644 --- a/dodo.py +++ b/dodo.py @@ -57,7 +57,7 @@ def list_language_data(): *list_files("dangerzone/container_helpers"), "install/common/build-image.py", ] -IMAGE_TARGETS = ["share/container.tar.gz", "share/image-id.txt"] +IMAGE_TARGETS = ["share/container.tar", "share/image-id.txt"] SOURCE_DEPS = [ *list_files("assets"), @@ -188,8 +188,8 @@ def task_download_tessdata(): def task_build_image(): """Build the container image using ./install/common/build-image.py""" - img_src = "share/container.tar.gz" - img_dst = RELEASE_DIR / f"container-{VERSION}-{ARCH}.tar.gz" # FIXME: Add arch + img_src = "share/container.tar" + img_dst = RELEASE_DIR / f"container-{VERSION}-{ARCH}.tar" # FIXME: Add arch img_id_src = "share/image-id.txt" img_id_dst = RELEASE_DIR / "image-id.txt" # FIXME: Add arch diff --git a/install/common/build-image.py b/install/common/build-image.py index 91fe79cc2..f38029094 100644 --- a/install/common/build-image.py +++ b/install/common/build-image.py @@ -1,5 +1,4 @@ import argparse -import gzip import platform import secrets import subprocess @@ -13,8 +12,6 @@ elif platform.system() == "Linux": CONTAINER_RUNTIME = "podman" -ARCH = platform.machine() - def str2bool(v): if isinstance(v, bool): @@ -50,6 +47,16 @@ def determine_git_tag(): ) +def determine_debian_archive_date(): + """Get the date of the Debian archive from Dockerfile.env.""" + for env in Path("Dockerfile.env").read_text().split("\n"): + if env.startswith("DEBIAN_ARCHIVE_DATE"): + return env.split("=")[1] + raise Exception( + "Could not find 'DEBIAN_ARCHIVE_DATE' build argument in Dockerfile.env" + ) + + def main(): parser = argparse.ArgumentParser() parser.add_argument( @@ -59,16 +66,15 @@ def main(): help=f"The container runtime for building the image (default: {CONTAINER_RUNTIME})", ) parser.add_argument( - "--no-save", - action="store_true", - help="Do not save the container image as a tarball in share/container.tar.gz", + "--platform", + default=None, + help=f"The platform for building the image (default: current platform)", ) parser.add_argument( - "--compress-level", - type=int, - choices=range(0, 10), - default=9, - help="The Gzip compression level, from 0 (lowest) to 9 (highest, default)", + "--output", + "-o", + default=str(Path("share") / "container.tar"), + help="Path to store the container image", ) parser.add_argument( "--use-cache", @@ -83,63 +89,62 @@ def main(): default=None, help="Provide a custom tag for the image (for development only)", ) + parser.add_argument( + "--debian-archive-date", + "-d", + default=determine_debian_archive_date(), + help="Use a specific Debian snapshot archive, by its date (default %(default)s)", + ) + parser.add_argument( + "--dry", + default=False, + action="store_true", + help="Do not run any commands, just print what would happen", + ) args = parser.parse_args() - tarball_path = Path("share") / "container.tar.gz" - image_id_path = Path("share") / "image-id.txt" - - print(f"Building for architecture '{ARCH}'") - - tag = args.tag or determine_git_tag() - image_name_tagged = IMAGE_NAME + ":" + tag + tag = args.tag or f"{args.debian_archive_date}-{determine_git_tag()}" + image_name_tagged = f"{IMAGE_NAME}:{tag}" print(f"Will tag the container image as '{image_name_tagged}'") - with open(image_id_path, "w") as f: - f.write(tag) + image_id_path = Path("share") / "image-id.txt" + if not args.dry: + with open(image_id_path, "w") as f: + f.write(tag) # Build the container image, and tag it with the calculated tag print("Building container image") cache_args = [] if args.use_cache else ["--no-cache"] + platform_args = [] if not args.platform else ["--platform", args.platform] + rootless_args = [] if args.runtime == "docker" else ["--rootless"] + rootless_args = [] + dry_args = [] if not args.dry else ["--dry"] + subprocess.run( [ - args.runtime, + "./dev_scripts/repro-build", "build", - BUILD_CONTEXT, + "--runtime", + args.runtime, + "--build-arg", + f"DEBIAN_ARCHIVE_DATE={args.debian_archive_date}", + "--datetime", + args.debian_archive_date, + *dry_args, *cache_args, - "-f", - "Dockerfile", + *platform_args, + *rootless_args, "--tag", image_name_tagged, + "--output", + args.output, + "-f", + "Dockerfile", + BUILD_CONTEXT, ], check=True, ) - if not args.no_save: - print("Saving container image") - cmd = subprocess.Popen( - [ - CONTAINER_RUNTIME, - "save", - image_name_tagged, - ], - stdout=subprocess.PIPE, - ) - - print("Compressing container image") - chunk_size = 4 << 20 - with gzip.open( - tarball_path, - "wb", - compresslevel=args.compress_level, - ) as gzip_f: - while True: - chunk = cmd.stdout.read(chunk_size) - if len(chunk) > 0: - gzip_f.write(chunk) - else: - break - cmd.wait(5) - if __name__ == "__main__": sys.exit(main()) diff --git a/install/linux/build-rpm.py b/install/linux/build-rpm.py index 1cec7d2c2..1eba1563e 100755 --- a/install/linux/build-rpm.py +++ b/install/linux/build-rpm.py @@ -66,14 +66,14 @@ def build(build_dir, qubes=False): print("* Creating a Python sdist") tessdata = root / "share" / "tessdata" tessdata_bak = root / "tessdata.bak" - container_tar_gz = root / "share" / "container.tar.gz" - container_tar_gz_bak = root / "container.tar.gz.bak" + container_tar = root / "share" / "container.tar" + container_tar_bak = root / "container.tar.bak" if tessdata.exists(): tessdata.rename(tessdata_bak) - stash_container = qubes and container_tar_gz.exists() - if stash_container and container_tar_gz.exists(): - container_tar_gz.rename(container_tar_gz_bak) + stash_container = qubes and container_tar.exists() + if stash_container and container_tar.exists(): + container_tar.rename(container_tar_bak) try: subprocess.run(["poetry", "build", "-f", "sdist"], cwd=root, check=True) # Copy and unlink the Dangerzone sdist, instead of just renaming it. If the @@ -84,8 +84,8 @@ def build(build_dir, qubes=False): finally: if tessdata_bak.exists(): tessdata_bak.rename(tessdata) - if stash_container and container_tar_gz_bak.exists(): - container_tar_gz_bak.rename(container_tar_gz) + if stash_container and container_tar_bak.exists(): + container_tar_bak.rename(container_tar) print("* Building RPM package") cmd = [ diff --git a/install/linux/dangerzone.spec b/install/linux/dangerzone.spec index 3076cfe98..6bb379ab2 100644 --- a/install/linux/dangerzone.spec +++ b/install/linux/dangerzone.spec @@ -18,7 +18,7 @@ # # * Qubes packages include some extra files under /etc/qubes-rpc, whereas # regular RPM packages include the container image under -# /usr/share/container.tar.gz +# /usr/share/container.tar # * Qubes packages have some extra dependencies. # 3. It is best to consume this SPEC file using the `install/linux/build-rpm.py` # script, which handles the necessary scaffolding for building the package. diff --git a/tests/isolation_provider/test_container.py b/tests/isolation_provider/test_container.py index a43e24b68..dd2565d8d 100644 --- a/tests/isolation_provider/test_container.py +++ b/tests/isolation_provider/test_container.py @@ -69,7 +69,7 @@ def test_install_raise_if_image_cant_be_installed( ) # Make podman load fail - mocker.patch("gzip.open", mocker.mock_open(read_data="")) + mocker.patch("builtins.open", mocker.mock_open(read_data="")) fp.register_subprocess( [container_utils.get_runtime(), "load"], @@ -83,6 +83,10 @@ def test_install_raises_if_still_not_installed( self, mocker: MockerFixture, provider: Container, fp: FakeProcess ) -> None: """When an image keep being not installed, it should return False""" + fp.register_subprocess( + ["podman", "version", "-f", "{{.Client.Version}}"], + stdout="4.0.0", + ) fp.register_subprocess( [container_utils.get_runtime(), "image", "ls"], @@ -101,8 +105,8 @@ def test_install_raises_if_still_not_installed( occurrences=2, ) - # Patch gzip.open and podman load so that it works - mocker.patch("gzip.open", mocker.mock_open(read_data="")) + # Patch open and podman load so that it works + mocker.patch("builtins.open", mocker.mock_open(read_data="")) fp.register_subprocess( [container_utils.get_runtime(), "load"], )