Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a new cleanup job for GKE, AKS, and EKS clusters #6935

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions ci/jenkins/jobs/macros.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,16 @@
#!/bin/bash
set -ex
sudo ./ci/test-conformance-aks.sh --cluster-name "${CLUSTERNAME}" --cleanup-only

- builder:
name: builder-cluster-cleanup
builders:
- shell: |-
#!/bin/bash
set -ex
sudo ./ci/test-conformance-aks.sh --cleanup-all
sudo ./ci/test-conformance-gke.sh --gcloud-sdk-path "${GCLOUD_SDK_PATH}" --cleanup-all
sudo ./ci/test-conformance-eks.sh --cleanup-all

- builder:
name: builder-workload-cluster-garbage-collection
Expand Down
14 changes: 14 additions & 0 deletions ci/jenkins/jobs/projects-cloud.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -785,6 +785,20 @@
- text:
credential-id: WORKFORCE_POOL # Jenkins secret that stores the cloud resource pool id
variable: WORKFORCE_POOL
- 'cloud-{name}-cleanup-period':
description: This is for deleting remaining clusters on all cloud providers.
builders:
- builder-cluster-cleanup
concurrent: false
disabled: false
node: antrea-cloud
branches:
- '${{ANTREA_GIT_REVISION}}'
repo_url: '${{ANTREA_REPO}}'
publishers:
triggers:
- timed: H 22 * * *
wrappers: []
- 'cloud-{name}-{test_name}-cleanup':
test_name: gke
description: This is for deleting GKE test clusters.
Expand Down
36 changes: 34 additions & 2 deletions ci/test-conformance-aks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ KUBE_CONFORMANCE_IMAGE_VERSION=auto

_usage="Usage: $0 [--cluster-name <AKSClusterNameToUse>] [--kubeconfig <KubeconfigSavePath>] [--k8s-version <ClusterVersion>]\
[--azure-app-id <AppID>] [--azure-tenant-id <TenantID>] [--azure-password <Password>] \
[--aks-region <Region>] [--log-mode <SonobuoyResultLogLevel>] [--setup-only] [--cleanup-only]
[--aks-region <Region>] [--log-mode <SonobuoyResultLogLevel>] [--setup-only] [--cleanup-only] [--cleanup-all]

Setup a AKS cluster to run K8s e2e community tests (Conformance & Network Policy).

Expand All @@ -47,7 +47,8 @@ Setup a AKS cluster to run K8s e2e community tests (Conformance & Network Policy
--aks-region The Azure region where the cluster will be initiated. Defaults to westus.
--log-mode Use the flag to set either 'report', 'detail', or 'dump' level data for sonobuoy results.
--setup-only Only perform setting up the cluster and run test.
--cleanup-only Only perform cleaning up the cluster."
--cleanup-only Only perform cleaning up the cluster.
--cleanup-all Cleaning up all clusters without protected tag."

function print_usage {
echoerr "$_usage"
Expand Down Expand Up @@ -104,6 +105,10 @@ case $key in
RUN_ALL=false
shift
;;
--cleanup-all)
RUN_CLEANUP_ALL=true
shift
;;
-h|--help)
print_usage
exit 0
Expand Down Expand Up @@ -300,6 +305,29 @@ function cleanup_cluster() {
echo "=== Cleanup cluster ${CLUSTER} succeeded ==="
}

function cleanup_all_clusters() {
echo '=== Cleaning up all AKS clusters without tag protected ==='
clusters=$(az aks list \
--query "[!(tags.protected && tags.protected=='true') && resourceGroup=='${RESOURCE_GROUP}'].{name:name,rg:resourceGroup}" \
-o tsv)
if [[ -z "$clusters" ]]; then
echo "Unprotected cluster not found."
exit
fi
while read -r clusterName resourceGroup; do
[[ -z "$clusterName" ]] && continue
echo "Deleting Cluster: $clusterName in $resourceGroup"
az aks delete --name "$clusterName" --resource-group "$resourceGroup" --yes
done <<< "$clusters"
resource=$(az aks list \
--query "[resourceGroup=='${RESOURCE_GROUP}'].{name:name,rg:resourceGroup}" \
-o tsv)
if [[ -z "$resource" ]]; then
az group delete --name ${RESOURCE_GROUP} --yes --no-wait
fi
echo "=== Cleanup AKS clusters succeeded ==="
}

# ensures that the script can be run from anywhere
THIS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
GIT_CHECKOUT_DIR=${THIS_DIR}/..
Expand All @@ -317,6 +345,10 @@ if [[ "$RUN_ALL" == true || "$RUN_CLEANUP_ONLY" == true ]]; then
cleanup_cluster
fi

if [[ "$RUN_CLEANUP_ALL" == true ]]; then
cleanup_all_clusters
fi

if [[ "$RUN_CLEANUP_ONLY" == false && $TEST_SCRIPT_RC -ne 0 ]]; then
exit 1
fi
37 changes: 36 additions & 1 deletion ci/test-conformance-eks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ AWS_SERVICE_USER_NAME=""
_usage="Usage: $0 [--cluster-name <EKSClusterNameToUse>] [--kubeconfig <KubeconfigSavePath>] [--k8s-version <ClusterVersion>]\
[--aws-access-key <AccessKey>] [--aws-secret-key <SecretKey>] [--aws-region <Region>] [--aws-service-user <ServiceUserName>]\
[--aws-service-user-role-arn <ServiceUserRoleARN>] [--ssh-key <SSHKey] [--ssh-private-key <SSHPrivateKey] [--log-mode <SonobuoyResultLogLevel>]\
[--setup-only] [--cleanup-only]
[--setup-only] [--cleanup-only] [--cleanup-all]

Setup a EKS cluster to run K8s e2e community tests (Conformance & Network Policy).

Expand All @@ -56,6 +56,7 @@ Setup a EKS cluster to run K8s e2e community tests (Conformance & Network Policy
--log-mode Use the flag to set either 'report', 'detail', or 'dump' level data for sonobuoy results.
--setup-only Only perform setting up the cluster and run test.
--cleanup-only Only perform cleaning up the cluster.
--cleanup-all Cleaning up all clusters without protected tag.
--skip-eksctl-install Do not install the latest eksctl version. Eksctl must be installed already."

function print_usage {
Expand Down Expand Up @@ -125,6 +126,10 @@ case $key in
RUN_ALL=false
shift
;;
--cleanup-all)
RUN_CLEANUP_ALL=true
shift
;;
--skip-eksctl-install)
INSTALL_EKSCTL=false
shift
Expand Down Expand Up @@ -354,6 +359,32 @@ function cleanup_cluster() {
echo "=== Cleanup cluster ${CLUSTER} succeeded ==="
}

function cleanup_all_clusters() {
echo '=== Cleaning up all EKS clusters without tag protected ==='
clusters=$(eksctl get cluster --output json | jq -r '.[].metadata.name')
if [[ -z "$clusters" ]]; then
echo "Unprotected cluster not found."
exit
fi
for cluster in $clusters; do
cluster_arn=$(aws eks describe-cluster --name "$cluster" --query "cluster.arn" --output text 2>/dev/null)
if [[ "$cluster_arn" == "None" ]] || [[ -z "$cluster_arn" ]]; then
log "Warning: Unable to retrieve ARN for cluster '$cluster'. Skipping this cluster."
continue
fi
tags=$(aws eks list-tags-for-resource --resource-arn "$cluster_arn" --query "tags" --output json 2>/dev/null)
if [[ $? -ne 0 ]]; then
echo "Warning: Unable to retrieve tags for cluster '$cluster'. Skipping this cluster."
continue
fi
has_tag=$(echo "$tags" | jq -r --arg key "protected" '. | has($key)')
if [[ "$has_tag" == "false" ]]; then
eksctl delete cluster --name ${cluster} --region $REGION
fi
done
echo "=== Cleanup cluster ${cluster} succeeded ==="
}

# ensures that the script can be run from anywhere
THIS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
GIT_CHECKOUT_DIR=${THIS_DIR}/..
Expand All @@ -371,6 +402,10 @@ if [[ "$RUN_ALL" == true || "$RUN_CLEANUP_ONLY" == true ]]; then
cleanup_cluster
fi

if [[ "$RUN_CLEANUP_ALL" == true ]]; then
cleanup_all_clusters
fi

if [[ "$RUN_CLEANUP_ONLY" == false && $TEST_SCRIPT_RC -ne 0 ]]; then
exit 1
fi
31 changes: 31 additions & 0 deletions ci/test-conformance-gke.sh
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,10 @@ case $key in
RUN_ALL=false
shift
;;
--cleanup-all)
RUN_CLEANUP_ALL=true
shift
;;
-h|--help)
print_usage
exit 0
Expand Down Expand Up @@ -329,6 +333,29 @@ function cleanup_cluster() {
echo "=== Cleanup cluster ${CLUSTER} succeeded ==="
}

function cleanup_all_clusters() {
echo '=== Cleaning up all unprotected GKE clusters ==='
gcloud container clusters list --zone "${GKE_ZONE}" --format="json(name, location, resourceLabels)" | \
jq -r '.[] | select(.resourceLabels.protected != "true") | "\(.name) \(.location)"' | \
while read CLUSTER_NAME ZONE; do
echo "Deleting cluster ${CLUSTER_NAME} in zone '${GKE_ZONE}'..."
retry=5
while [[ "${retry}" -gt 0 ]]; do
gcloud container clusters delete ${CLUSTER_NAME} --zone ${GKE_ZONE}
if [[ $? -eq 0 ]]; then
break
fi
sleep 10
retry=$((retry-1))
done
if [[ "${retry}" -eq 0 ]]; then
echo "=== Failed to delete GKE cluster ${CLUSTER}! ==="
continue
fi
done
echo "=== Cleanup GKE clusters succeeded ==="
}

if [[ "$RUN_ALL" == true || "$RUN_SETUP_ONLY" == true ]]; then
setup_gke
deliver_antrea_to_gke
Expand All @@ -339,6 +366,10 @@ if [[ "$RUN_ALL" == true || "$RUN_CLEANUP_ONLY" == true ]]; then
cleanup_cluster
fi

if [[ "$RUN_CLEANUP_ALL" == true ]]; then
cleanup_all_clusters
fi

if [[ "$RUN_CLEANUP_ONLY" == false && $TEST_SCRIPT_RC -ne 0 ]]; then
exit 1
fi
Loading