Skip to content

Commit

Permalink
Merge pull request #442 from Ensembl/lcampbell/docker_redo
Browse files Browse the repository at this point in the history
Reconfigure how containers are used in nextflow pipelines
  • Loading branch information
JAlvarezJarreta authored Oct 2, 2024
2 parents 71e0968 + 47fd389 commit 6d2ceea
Show file tree
Hide file tree
Showing 33 changed files with 313 additions and 169 deletions.
7 changes: 5 additions & 2 deletions cicd/gitlab/dot.gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,7 @@ default:

# Using default stages (https://docs.gitlab.com/ee/ci/yaml/#stages): .pre > build > test > deploy > .post


# In-house pipelines

include:
# CI/CD debugging
- local: cicd/gitlab/parts/debug.gitlab-ci.yml
Expand All @@ -51,6 +49,11 @@ include:
- local: cicd/gitlab/parts/license.gitlab-ci.yml
# Flat file checks
- local: cicd/gitlab/parts/flatfile.gitlab-ci.yml
# Genomio docker deploy (on release tag creation)
- local: cicd/gitlab/parts/dockerbuild.genomio.gitlab-ci.yml
rules:
- if: '$CI_COMMIT_TAG =~ /v[0-9]+\.[0-9]+\.[0-9]+$/'
when: always


# External pipelines
Expand Down
125 changes: 125 additions & 0 deletions cicd/gitlab/parts/dockerbuild.genomio.gitlab-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# See the NOTICE file distributed with this work for additional information
# regarding copyright ownership.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Python related part of the Ensembl/ensembl-genomio CI/CD pipeline

# Test stage instances

# A generic job to be used by all Python jobs with the "extends" mechanism
# (https://docs.gitlab.com/ee/ci/yaml/#extends)

### variable setup
variables:

# https://docs.gitlab.com/ee/user/namespace/
CONTAINER_IMAGE_NAME: ensembl-genomio
GENOMIO_DIR: containers/docker/genomio

# Last commit before merge to main should follow versioning tag: vX.X.X
GENOMIO_RELEASE_VERSION: $CI_COMMIT_TAG

# GITLAB:
GITLAB_PROJECT: vectorbase
GITLAB_REPO: ensembl-genomio
GITLAB_DOCKER_IMAGE: $GITLAB_PROJECT/$GITLAB_REPO/$CONTAINER_IMAGE_NAME

# DOCKERHUB:
DOCKERHUB_PROJECT: ensemblorg
DHUB_DOCKER_IMAGE: $DOCKERHUB_PROJECT/$CONTAINER_IMAGE_NAME
DHUB_REG_URL: registry.hub.docker.com

### Phase 1: Docker image build
genomio_docker_build:
variables:
CI_DEBUG_TRACE: "true"
image: docker:latest
stage: build
services:
- docker:dind

tags:
- dind

script:
# https://docs.gitlab.com/ee/ci/variables/predefined_variables.html
- echo "Building $CONTAINER_IMAGE_NAME with Docker:"
- echo "docker build -f ${GENOMIO_DIR}/Dockerfile --build-arg genomio_release_version=$GENOMIO_RELEASE_VERSION -t $CONTAINER_IMAGE_NAME:latest $GENOMIO_DIR"
- docker build -f ${GENOMIO_DIR}/Dockerfile --build-arg genomio_release_version=$GENOMIO_RELEASE_VERSION -t $CONTAINER_IMAGE_NAME:latest $GENOMIO_DIR

artifacts:
paths:
- $GENOMIO_DIR

### Phase 2: Tag and push Ensembl Genomio image to GitLab
genomio_gitlab_register:
extends: genomio_docker_build
stage: test
# Using `needs` as it allows to depend on jobs from the same stage
needs:
# (https://docs.gitlab.com/ee/ci/yaml/#needs)
- job: genomio_docker_build
artifacts: true
artifacts:
paths:
- $GENOMIO_DIR

script:
- echo -e -n "Logging into Gitlab container registry:\n$CI_REGISTRY_PASSWORD | docker login -u $CI_REGISTRY_USER --password-stdin $CI_REGISTRY\n"
- echo $CI_REGISTRY_PASSWORD | docker login -u $CI_REGISTRY_USER --password-stdin $CI_REGISTRY

- echo "Tagging GitLab genomio image"
- echo "docker tag $CONTAINER_IMAGE_NAME:latest $CI_REGISTRY/$GITLAB_DOCKER_IMAGE:$GENOMIO_RELEASE_VERSION"
- docker tag $CONTAINER_IMAGE_NAME:latest $CI_REGISTRY/$GITLAB_DOCKER_IMAGE:$GENOMIO_RELEASE_VERSION
- docker tag $CONTAINER_IMAGE_NAME:latest $CI_REGISTRY/$GITLAB_DOCKER_IMAGE:latest

- echo "Pushing versioned genomio image to Gitlab registry:"
- echo "CMD=docker push $CI_REGISTRY/$GITLAB_DOCKER_IMAGE:$GENOMIO_RELEASE_VERSION"
- docker push "$CI_REGISTRY/$GITLAB_DOCKER_IMAGE:$GENOMIO_RELEASE_VERSION"

- echo "Pushing default 'latest' image to gitlab registry:"
- echo "CMD=docker push $CI_REGISTRY/$GITLAB_DOCKER_IMAGE:latest"
- docker push "$CI_REGISTRY/$GITLAB_DOCKER_IMAGE:latest"

### Phase 3: Tag and push Ensembl Genomio image to Dockerhub
genomio_dockerhub_register:
extends: genomio_docker_build
stage: deploy
# Using `needs` as it allows to depend on jobs from the same stage
needs:
# (https://docs.gitlab.com/ee/ci/yaml/#needs)
- job: genomio_gitlab_register
- job: genomio_docker_build
artifacts: true
artifacts:
paths:
- $GENOMIO_DIR

script:
- echo "Logging into Dockerhub registry:"
- echo $ENSEMBL_DOCKERHUB_AUTH | docker login -u $ENS_DOCKERHUB_PALADIN --password-stdin $DHUB_REG_URL

- echo "Tagging Docker genomio image"
- echo "CMD=docker tag $CONTAINER_IMAGE_NAME:latest $DHUB_REG_URL/$DHUB_DOCKER_IMAGE:$GENOMIO_RELEASE_VERSION"
- docker tag $CONTAINER_IMAGE_NAME:latest $DHUB_REG_URL/$DHUB_DOCKER_IMAGE:$GENOMIO_RELEASE_VERSION
- docker tag $CONTAINER_IMAGE_NAME:latest $DHUB_REG_URL/$DHUB_DOCKER_IMAGE:latest

- echo "Pushing versioned genomio image to registry:"
- echo "CMD=docker push $DHUB_REG_URL/$DHUB_DOCKER_IMAGE:$GENOMIO_RELEASE_VERSION"
- docker push "$DHUB_REG_URL/$DHUB_DOCKER_IMAGE:$GENOMIO_RELEASE_VERSION"

- echo "Pushing default 'latest' image to genomio registry:"
- echo "CMD=docker push $DHUB_REG_URL/$DHUB_DOCKER_IMAGE:latest"
- docker push "$DHUB_REG_URL/$DHUB_DOCKER_IMAGE:latest"


33 changes: 33 additions & 0 deletions containers/docker/genomio/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
FROM python:3.10-slim

ARG genomio_release_version=latest

RUN apt-get update \
&& apt-get -y upgrade \
&& apt-get -y install \
default-libmysqlclient-dev \
pkg-config \
gcc \
pip \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

RUN pip install --no-cache-dir --upgrade pip && pip install --no-cache-dir ensembl-genomio mysqlclient

# Testing dependencies
CMD ["python", "-c", "'import ensembl.io.genomio'"]
RUN <<EOF
# Test entry point module:
assembly_download --help
EOF

LABEL base.image="ensembl-genomio:${genomio_release_version}"
LABEL version="$genomio_release_version"
LABEL software="Ensembl-genomio pipelines"
LABEL software.version="$genomio_release_version"
LABEL about.summary="A lighweight container to run ensembl-genomio module scripts."
LABEL about.home="https://github.com/Ensembl/ensembl-genomio"
LABEL about.documentation="https://ensembl.github.io/ensembl-genomio"
LABEL license="https://www.apache.org/licenses/LICENSE-2.0.txt"
LABEL mantainer="Ensembl"
LABEL mantainer.email="[email protected]"
46 changes: 0 additions & 46 deletions containers/ncbi_datasets_lite.recipe.def

This file was deleted.

65 changes: 0 additions & 65 deletions containers/ncbi_datasets_v16.10.0.def

This file was deleted.

3 changes: 1 addition & 2 deletions pipelines/nextflow/modules/annotation/dump_annotation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@

process DUMP_ANNOTATION {
tag "${db.species}"
label "variable_2_8_32"
label "ensembl_scripts_container"
label 'variable_2_8_32'
maxForks params.max_database_forks

input:
Expand Down
2 changes: 1 addition & 1 deletion pipelines/nextflow/modules/database/dump_db.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

process DUMP_DB {
tag "$db.species"
label "variable_2_8_32"
label 'variable_2_8_32'
publishDir "$out_dir/$release_dir/coredb/$db.division", mode: 'copy'
maxForks params.max_database_forks

Expand Down
4 changes: 2 additions & 2 deletions pipelines/nextflow/modules/download/datasets_genome_meta.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ process DOWNLOAD_NCBI_STATS {
tag "$meta.id"
label 'local'
label 'cached'
label 'datasets_container'
container 'ensemblorg/datasets-cli:latest'

input:
val(meta) // with keys [ id, accession ]
Expand All @@ -40,7 +40,7 @@ process DOWNLOAD_NCBI_STATS {
exit 1
fi
# Check if it should maybe be using RefSeq?
# Check if it should maybe be using RefSeq?
if [[ $(jq '.total_count' !{output}) -eq 0 ]] && [[ !{meta.accession} =~ "GCA_" ]]; then
accession=$(echo !{meta.accession} | sed 's/^GCA_/GCF_/')
echo "Trying again with RefSeq accession: $accession"
Expand Down
2 changes: 1 addition & 1 deletion pipelines/nextflow/modules/events/dump_events.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

process DUMP_EVENTS {
tag "${db.species}"
label "variable_2_8_32"
label 'variable_2_8_32'
maxForks params.max_database_forks

input:
Expand Down
3 changes: 1 addition & 2 deletions pipelines/nextflow/modules/fasta/dump_fasta_dna.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@

process DUMP_FASTA_DNA {
tag "${db.species}"
label "variable_2_8_32"
label "ensembl_scripts_container"
label 'variable_2_8_32'
maxForks params.max_database_forks

input:
Expand Down
3 changes: 1 addition & 2 deletions pipelines/nextflow/modules/fasta/dump_fasta_peptides.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@

process DUMP_FASTA_PEPTIDES {
tag "${db.species}"
label "variable_2_8_32"
label "ensembl_scripts_container"
label 'variable_2_8_32'
maxForks params.max_database_forks

input:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

process DUMP_GENOME_META {
tag "${db.species}"
label "normal"
label 'normal'
maxForks params.max_database_forks

input:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

process COMPARE_GENOME_STATS {
tag "${db.species}"
label "local"
label 'local'

input:
tuple val(db), path(ncbi_stats, stageAs: "ncbi_stats.json"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

process DUMP_GENOME_STATS {
tag "${db.species}"
label "normal"
label 'normal'
maxForks params.max_database_forks

input:
Expand Down
3 changes: 1 addition & 2 deletions pipelines/nextflow/modules/gff3/dump_gff3.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@

process DUMP_GFF3 {
tag "${db.species}"
label "variable_2_8_32"
label "ensembl_scripts_container"
label 'variable_2_8_32'
maxForks params.max_database_forks

input:
Expand Down
Loading

0 comments on commit 6d2ceea

Please sign in to comment.