From b46b99bcff7bab6b591a0f3cd58fc9c02db1dbb2 Mon Sep 17 00:00:00 2001 From: Daniel Reuter Date: Fri, 24 Jan 2025 08:49:33 +0100 Subject: [PATCH] feat: advice `single_zone`, remove old advice --- .../single_aws_zone/action_needed_summary.md | 1 - .../experiment_zone_outage.json.ftl | 382 ------------------ extadvice/single_aws_zone/implemented.md | 1 - extadvice/single_aws_zone/instructions.md | 30 -- extadvice/single_aws_zone/motivation.md | 2 - extadvice/single_aws_zone/single_aws_zone.go | 57 --- .../single_aws_zone/validation_needed.md | 2 - .../action_needed_summary.md | 1 - .../experiment_zone_outage.json.ftl | 382 ------------------ extadvice/single_azure_zone/implemented.md | 2 - extadvice/single_azure_zone/instructions.md | 30 -- extadvice/single_azure_zone/motivation.md | 2 - .../single_azure_zone/single_azure_zone.go | 57 --- .../single_azure_zone/validation_needed.md | 2 - .../single_gcp_zone/action_needed_summary.md | 1 - .../experiment_zone_outage.json.ftl | 382 ------------------ extadvice/single_gcp_zone/implemented.md | 1 - extadvice/single_gcp_zone/instructions.md | 30 -- extadvice/single_gcp_zone/motivation.md | 2 - extadvice/single_gcp_zone/single_gcp_zone.go | 57 --- .../single_gcp_zone/validation_needed.md | 2 - main.go | 24 -- 22 files changed, 1450 deletions(-) delete mode 100644 extadvice/single_aws_zone/action_needed_summary.md delete mode 100644 extadvice/single_aws_zone/experiment_zone_outage.json.ftl delete mode 100644 extadvice/single_aws_zone/implemented.md delete mode 100644 extadvice/single_aws_zone/instructions.md delete mode 100644 extadvice/single_aws_zone/motivation.md delete mode 100644 extadvice/single_aws_zone/single_aws_zone.go delete mode 100644 extadvice/single_aws_zone/validation_needed.md delete mode 100644 extadvice/single_azure_zone/action_needed_summary.md delete mode 100644 extadvice/single_azure_zone/experiment_zone_outage.json.ftl delete mode 100644 extadvice/single_azure_zone/implemented.md delete mode 100644 extadvice/single_azure_zone/instructions.md delete mode 100644 extadvice/single_azure_zone/motivation.md delete mode 100644 extadvice/single_azure_zone/single_azure_zone.go delete mode 100644 extadvice/single_azure_zone/validation_needed.md delete mode 100644 extadvice/single_gcp_zone/action_needed_summary.md delete mode 100644 extadvice/single_gcp_zone/experiment_zone_outage.json.ftl delete mode 100644 extadvice/single_gcp_zone/implemented.md delete mode 100644 extadvice/single_gcp_zone/instructions.md delete mode 100644 extadvice/single_gcp_zone/motivation.md delete mode 100644 extadvice/single_gcp_zone/single_gcp_zone.go delete mode 100644 extadvice/single_gcp_zone/validation_needed.md diff --git a/extadvice/single_aws_zone/action_needed_summary.md b/extadvice/single_aws_zone/action_needed_summary.md deleted file mode 100644 index d960ffd..0000000 --- a/extadvice/single_aws_zone/action_needed_summary.md +++ /dev/null @@ -1 +0,0 @@ -Right now, when availability zone *${target.attr('aws.zone',0)}* is unavailable, all of *${target.attr('steadybit.label')}*'s Kubernetes pods are unavailable as well. diff --git a/extadvice/single_aws_zone/experiment_zone_outage.json.ftl b/extadvice/single_aws_zone/experiment_zone_outage.json.ftl deleted file mode 100644 index f3b20a2..0000000 --- a/extadvice/single_aws_zone/experiment_zone_outage.json.ftl +++ /dev/null @@ -1,382 +0,0 @@ -{ - "templateTitle": "AWS Zone outage", - "templateDescription": "Check what happens when an AWS Availability Zone is down and validate that Kubernetes manages this accordingly by routing the traffic within expected failure rates so that the offered features still work. As soon as the zone is available again, the pod should be ready again within 60s.", - "placeholders": [ - <#if target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='http'> - { - "key": "httpLoadBalancedEndpoint", - "name": "HTTP Load Balanced Endpoint", - "description": "### What is the URL of a **load-balanced HTTP endpoint** served by the Kubernetes workload?\n\nWe will use the HTTP endpoint to validate that the provided service's features are working fine for the entire experiment duration." - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='k6'> - { - "key": "k6LoadTestFile", - "name": "k6 Load Test File", - "description": "### Specify a k6 load test file to validate the service's functionality.\n\nWe will use the load test to validate that the provided service's features are working fine for the entire experiment duration." - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='jmeter'> - { - "key": "jmeterLoadTestFile", - "name": "JMeter Load Test File", - "description": "### Specify a JMeter load test file to validate the service's functionality.\n\nWe will use the load test to validate that the provided service's features are working fine for the entire experiment duration." - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='gatling'> - { - "key": "gatlingLoadTestFile", - "name": "Gatling Load Test File", - "description": "### Specify a Gatling load test file to validate the service's functionality.\n\nWe will use the load test to validate that the provided service's features are working fine for the entire experiment duration." - } - - ], - "tags": ["Redundancy", "AWS", "Availability Zone", "Advice"], - "experimentName": "AWS Zone Outage of ${target.attr('aws.zone', 0)} for ${target.attr('steadybit.label')}", - "hypothesis": "When AWS Availability Zone ${target.attr('aws.zone', 0)} is down for ${target.attr('steadybit.label')}, Kubernetes manages this accordingly by routing the traffic within expected failure rates so that the offered features still work. As soon as the zone is available again, the pod is ready within 60s.", - "lanes": [ - { - "steps": [ - <#if target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='http'> - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "140s", - "headers": [], - "method": "GET", - "successRate": 100, - "maxConcurrent": 5, - "followRedirects": false, - "readTimeout": "5s", - "connectTimeout": "5s", - "requestsPerSecond": 10, - "url": "[[httpLoadBalancedEndpoint]]", - "statusCode": "200-299" - }, - "customLabel": "INVARIANT: ${target.attr('steadybit.label')}'s features work within expected success rates", - "actionType": "com.steadybit.extension_http.check.periodically", - "radius": {} - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='k6'> - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "environment": [], - "file": "[[k6LoadTestFile]]" - }, - "actionType": "com.steadybit.extension_k6.run", - "radius": {} - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='jmeter'> - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "file": "[[jmeterLoadTestFile]]", - "parameter": [] - }, - "actionType": "com.steadybit.extension_jmeter.run", - "radius": {} - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='gatling'> - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "file": "[[gatlingLoadTestFile]]", - "parameter": [] - }, - "actionType": "com.steadybit.extension_gatling.run", - "radius": {} - } - <#else> - { - "type": "wait", - "ignoreFailure": false, - "parameters": { - "duration": "140s" - }, - "customLabel": "TODO VALIDATION: INVARIANT: ${target.attr('steadybit.label')}'s features work within expected success rates" - } - - ] - }, - { - "steps": [ - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "20s", - "podCountCheckMode": "podCountEqualsDesiredCount" - }, - "customLabel": "GIVEN: All pods in ${target.attr('aws.zone', 0)} are ready", - "actionType": "<#if target.id.type=='com.steadybit.extension_kubernetes.kubernetes-deployment'>com.steadybit.extension_kubernetes.pod_count_check<#elseif target.id.type=='com.steadybit.extension_kubernetes.kubernetes-statefulset'>com.steadybit.extension_kubernetes.pod_count_check_statefulset<#else>com.steadybit.extension_kubernetes.pod_count_check_daemonset", - "radius": { - "targetType": "${target.id.type}", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "aws.zone", - "operator": "EQUALS", - "values": [ - "${target.attr('aws.zone', 0)}" - ] - }, - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - }, - { - "key": "k8s.namespace", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.namespace')}" - ] - }, - { - "key": "k8s.${target.attr('k8s.workload-type')}", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.workload-owner')}" - ] - } - ] - }, - "query": null - } - } - ] - }, - { - "steps": [ - { - "type": "wait", - "ignoreFailure": false, - "parameters": { - "duration": "20s" - }, - "customLabel": "Wait for AWS Zone outage" - }, - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "ip": [], - "port": [], - "duration": "60s", - "hostname": [], - "failOnHostNetwork": true - }, - "customLabel": "WHEN: Zone outage of ${target.attr('aws.zone', 0)} for ${target.attr('steadybit.label')}", - "actionType": "com.steadybit.extension_container.network_blackhole", - "radius": { - "targetType": "com.steadybit.extension_container.container", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "aws.zone", - "operator": "EQUALS", - "values": [ - "${target.attr('aws.zone', 0)}" - ] - }, - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - }, - { - "key": "k8s.namespace", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.namespace')}" - ] - }, - { - "key": "k8s.${target.attr('k8s.workload-type')}", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.workload-owner')}" - ] - } - ] - }, - "query": null, - "percentage": 50 - } - }, - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "60s", - "podCountCheckMode": "podCountEqualsDesiredCount" - }, - "customLabel": "THEN: After Zone outage, all pods become ready again within 60s", - "actionType": "<#if target.id.type=='com.steadybit.extension_kubernetes.kubernetes-deployment'>com.steadybit.extension_kubernetes.pod_count_check<#elseif target.id.type=='com.steadybit.extension_kubernetes.kubernetes-statefulset'>com.steadybit.extension_kubernetes.pod_count_check_statefulset<#else>com.steadybit.extension_kubernetes.pod_count_check_daemonset", - "radius": { - "targetType": "${target.id.type}", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "aws.zone", - "operator": "EQUALS", - "values": [ - "${target.attr('aws.zone', 0)}" - ] - }, - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - }, - { - "key": "k8s.namespace", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.namespace')}" - ] - }, - { - "key": "k8s.${target.attr('k8s.workload-type')}", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.workload-owner')}" - ] - } - ] - }, - "query": null - } - } - ] - }, - { - "steps": [ - { - "type": "wait", - "ignoreFailure": false, - "parameters": { - "duration": "20s" - }, - "customLabel": "Wait for AWS Zone outage" - }, - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "45s", - "podCountCheckMode": "podCountLessThanDesiredCount" - }, - "customLabel": "THEN: Pods are detected as down", - "actionType": "<#if target.id.type=='com.steadybit.extension_kubernetes.kubernetes-deployment'>com.steadybit.extension_kubernetes.pod_count_check<#elseif target.id.type=='com.steadybit.extension_kubernetes.kubernetes-statefulset'>com.steadybit.extension_kubernetes.pod_count_check_statefulset<#else>com.steadybit.extension_kubernetes.pod_count_check_daemonset", - "radius": { - "targetType": "${target.id.type}", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "aws.zone", - "operator": "EQUALS", - "values": [ - "${target.attr('aws.zone', 0)}" - ] - }, - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - }, - { - "key": "k8s.namespace", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.namespace')}" - ] - }, - { - "key": "k8s.${target.attr('k8s.workload-type')}", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.workload-owner')}" - ] - } - ] - }, - "query": null - } - } - ] - }, - { - "steps": [ - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "140s" - }, - "customLabel": "Show Kubernetes events from the cluster", - "actionType": "com.steadybit.extension_kubernetes.kubernetes_logs", - "radius": { - "targetType": "com.steadybit.extension_kubernetes.kubernetes-cluster", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - } - ] - }, - "query": null - } - } - ] - }, - { - "steps": [ - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "140s" - }, - "customLabel": "Show Pod Count Metrics for the cluster", - "actionType": "com.steadybit.extension_kubernetes.pod_count_metric", - "radius": { - "targetType": "com.steadybit.extension_kubernetes.kubernetes-cluster", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - } - ] - }, - "query": null - } - } - ] - } - ] -} diff --git a/extadvice/single_aws_zone/implemented.md b/extadvice/single_aws_zone/implemented.md deleted file mode 100644 index ab0376b..0000000 --- a/extadvice/single_aws_zone/implemented.md +++ /dev/null @@ -1 +0,0 @@ -Right now, when availability zone *${target.attr('aws.zone',0)}* fails, your service *${target.attr('steadybit.label')}* will still be available because you use *${target.attrs('aws.zone')?size}* zones to handle requests. diff --git a/extadvice/single_aws_zone/instructions.md b/extadvice/single_aws_zone/instructions.md deleted file mode 100644 index 4e05b33..0000000 --- a/extadvice/single_aws_zone/instructions.md +++ /dev/null @@ -1,30 +0,0 @@ -Schedule Kubernetes nodes in different availability zones and configure a `podAntiAffinity` to spread your pods across different zones. - -```yaml -apiVersion: apps/v1 -kind: Deployment -spec: - selector: - matchLabels: - app: example - template: - metadata: - labels: - app: example - spec: -% startHighlight % - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - example - topologyKey: "topology.kubernetes.io/zone" -% endHighlight % - containers: - - name: example - image: images.my-company.example/app:v4 -``` diff --git a/extadvice/single_aws_zone/motivation.md b/extadvice/single_aws_zone/motivation.md deleted file mode 100644 index d25b78c..0000000 --- a/extadvice/single_aws_zone/motivation.md +++ /dev/null @@ -1,2 +0,0 @@ -An availability zone can be unavailable as they are not redundantly designed. -In order to survive an outage of the availability zone *${target.attr('aws.zone',0)}* you should spread your Kubernetes pods across multiple availability zones. diff --git a/extadvice/single_aws_zone/single_aws_zone.go b/extadvice/single_aws_zone/single_aws_zone.go deleted file mode 100644 index ba67774..0000000 --- a/extadvice/single_aws_zone/single_aws_zone.go +++ /dev/null @@ -1,57 +0,0 @@ -package single_aws_zone - -import ( - "embed" - "github.com/steadybit/advice-kit/go/advice_kit_api" - "github.com/steadybit/extension-kit/extbuild" - "github.com/steadybit/extension-kit/extutil" - "github.com/steadybit/extension-kubernetes/extadvice/advice_common" - "github.com/steadybit/extension-kubernetes/extdaemonset" - "github.com/steadybit/extension-kubernetes/extdeployment" - "github.com/steadybit/extension-kubernetes/extstatefulset" -) - -const SingleAWSZoneID = "com.steadybit.extension_kubernetes.advice.single-aws-zone" - -//go:embed * -var SingleAwsZoneContent embed.FS - -func GetAdviceDescriptionSingleAwsZone() advice_kit_api.AdviceDefinition { - return advice_kit_api.AdviceDefinition{ - Id: SingleAWSZoneID, - Label: "Schedule Pods Across AWS Zones", - Version: extbuild.GetSemverVersionStringOrUnknown(), - Icon: "data:image/svg+xml,%3Csvg%20width%3D%2224%22%20height%3D%2224%22%20viewBox%3D%220%200%2024%2024%22%20fill%3D%22none%22%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%3E%0A%3Cpath%20d%3D%22M11.9436%207.04563C12.1262%206.98477%2012.3235%206.98477%2012.5061%207.04563L17.8407%208.82395C18.2037%208.94498%2018.4486%209.28468%2018.4485%209.66728C18.4485%2010.0499%2018.2036%2010.3895%2017.8405%2010.5105L12.5059%2012.2877C12.3235%2012.3485%2012.1262%2012.3485%2011.9438%2012.2877L6.60918%2010.5105C6.24611%2010.3895%206.00119%2010.0499%206.00116%209.66728C6.00112%209.28468%206.24598%208.94498%206.60902%208.82395L11.9436%207.04563Z%22%20fill%3D%22%231D2632%22%2F%3E%0A%3Cpath%20d%3D%22M7.20674%2013.2736C6.68268%2013.0989%206.11622%2013.3821%205.94153%2013.9062C5.76684%2014.4302%206.05007%2014.9967%206.57414%2015.1714L11.9087%2016.9496C12.114%2017.018%2012.336%2017.018%2012.5413%2016.9496L17.8759%2015.1714C18.4%2014.9967%2018.6832%2014.4302%2018.5085%2013.9062C18.3338%2013.3821%2017.7674%2013.0989%2017.2433%2013.2736L12.225%2014.9463L7.20674%2013.2736Z%22%20fill%3D%22%231D2632%22%2F%3E%0A%3Cpath%20fill-rule%3D%22evenodd%22%20clip-rule%3D%22evenodd%22%20d%3D%22M11.6491%201.06354C11.8754%200.97882%2012.1246%200.97882%2012.3509%201.06354L22.3506%204.80836C22.7412%204.95463%2023%205.32784%2023%205.74482V18.2552C23%2018.6722%2022.7412%2019.0454%2022.3506%2019.1916L12.3509%2022.9365C12.1246%2023.0212%2011.8754%2023.0212%2011.6491%2022.9365L1.64938%2019.1916C1.2588%2019.0454%201%2018.6722%201%2018.2552V5.74482C1%205.32784%201.2588%204.95463%201.64938%204.80836L11.6491%201.06354ZM3.00047%206.43809V17.5619L12%2020.9321L20.9995%2017.5619V6.43809L12%203.06785L3.00047%206.43809Z%22%20fill%3D%22%231D2632%22%2F%3E%0A%3C%2Fsvg%3E%0A", - Tags: &[]string{"kubernetes", "daemonset", "deployment", "statefulset", "aws", "zone"}, - AssessmentQueryApplicable: "(target.type=\"" + extdaemonset.DaemonSetTargetType + "\" OR target.type=\"" + extdeployment.DeploymentTargetType + "\" OR target.type=\"" + extstatefulset.StatefulSetTargetType + "\") AND aws.zone IS PRESENT", - Status: advice_kit_api.AdviceDefinitionStatus{ - ActionNeeded: advice_kit_api.AdviceDefinitionStatusActionNeeded{ - AssessmentQuery: "count(aws.zone) = 1", - Description: advice_kit_api.AdviceDefinitionStatusActionNeededDescription{ - Instruction: advice_common.ReadAdviceFile(SingleAwsZoneContent, "instructions.md"), - Motivation: advice_common.ReadAdviceFile(SingleAwsZoneContent, "motivation.md"), - Summary: advice_common.ReadAdviceFile(SingleAwsZoneContent, "action_needed_summary.md"), - }, - }, - Implemented: advice_kit_api.AdviceDefinitionStatusImplemented{ - Description: advice_kit_api.AdviceDefinitionStatusImplementedDescription{ - Summary: advice_common.ReadAdviceFile(SingleAwsZoneContent, "implemented.md"), - }, - }, - ValidationNeeded: advice_kit_api.AdviceDefinitionStatusValidationNeeded{ - Description: advice_kit_api.AdviceDefinitionStatusValidationNeededDescription{ - Summary: advice_common.ReadAdviceFile(SingleAwsZoneContent, "validation_needed.md"), - }, - Validation: extutil.Ptr([]advice_kit_api.Validation{ - { - Id: "com.steadybit.extension_kubernetes.single-aws-zone.experiment-1", - Type: "EXPERIMENT", - Name: "Availability Zone Outage", - ShortDescription: "When a single AWS availability zone fails, there are still pods of ${target.attr('steadybit.label')} ready to continue providing offered features.", - ExperimentTemplate: extutil.Ptr(advice_kit_api.ExperimentTemplate(advice_common.ReadAdviceFile(SingleAwsZoneContent, "experiment_zone_outage.json.ftl"))), - }, - }), - }, - }, - } -} diff --git a/extadvice/single_aws_zone/validation_needed.md b/extadvice/single_aws_zone/validation_needed.md deleted file mode 100644 index 9e89d2c..0000000 --- a/extadvice/single_aws_zone/validation_needed.md +++ /dev/null @@ -1,2 +0,0 @@ -Right now, your pods are spread across multiple zones. -Now, validate your redundancy by simulating an outage of one zone for *${target.attr('steadybit.label')}*. diff --git a/extadvice/single_azure_zone/action_needed_summary.md b/extadvice/single_azure_zone/action_needed_summary.md deleted file mode 100644 index 2233bbf..0000000 --- a/extadvice/single_azure_zone/action_needed_summary.md +++ /dev/null @@ -1 +0,0 @@ -Right now, when availability zone *${target.attr('azure.zone',0)}* is unavailable, all of *${target.attr('steadybit.label')}*'s Kubernetes pods are unavailable as well. diff --git a/extadvice/single_azure_zone/experiment_zone_outage.json.ftl b/extadvice/single_azure_zone/experiment_zone_outage.json.ftl deleted file mode 100644 index ca4a40b..0000000 --- a/extadvice/single_azure_zone/experiment_zone_outage.json.ftl +++ /dev/null @@ -1,382 +0,0 @@ -{ - "templateTitle": "Azure Zone outage", - "templateDescription": "Check what happens when an Azure Availability Zone is down and validate that Kubernetes manages this accordingly by routing the traffic within expected failure rates so that the offered features still work. As soon as the zone is available again, the pod should be ready again within 60s.", - "placeholders": [ - <#if target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='http'> - { - "key": "httpLoadBalancedEndpoint", - "name": "HTTP Load Balanced Endpoint", - "description": "### What is the URL of a **load-balanced HTTP endpoint** served by the Kubernetes workload?\n\nWe will use the HTTP endpoint to validate that the provided service's features are working fine for the entire experiment duration." - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='k6'> - { - "key": "k6LoadTestFile", - "name": "k6 Load Test File", - "description": "### Specify a k6 load test file to validate the service's functionality.\n\nWe will use the load test to validate that the provided service's features are working fine for the entire experiment duration." - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='jmeter'> - { - "key": "jmeterLoadTestFile", - "name": "JMeter Load Test File", - "description": "### Specify a JMeter load test file to validate the service's functionality.\n\nWe will use the load test to validate that the provided service's features are working fine for the entire experiment duration." - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='gatling'> - { - "key": "gatlingLoadTestFile", - "name": "Gatling Load Test File", - "description": "### Specify a Gatling load test file to validate the service's functionality.\n\nWe will use the load test to validate that the provided service's features are working fine for the entire experiment duration." - } - - ], - "tags": ["Redundancy", "Azure", "Availability Zone", "Advice"], - "experimentName": "Azure Zone Outage of ${target.attr('azure.zone', 0)} for ${target.attr('steadybit.label')}", - "hypothesis": "When Azure Availability Zone ${target.attr('azure.zone', 0)} is down for ${target.attr('steadybit.label')}, Kubernetes manages this accordingly by routing the traffic within expected failure rates so that the offered features still work. As soon as the zone is available again, the pod is ready within 60s.", - "lanes": [ - { - "steps": [ - <#if target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='http'> - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "140s", - "headers": [], - "method": "GET", - "successRate": 100, - "maxConcurrent": 5, - "followRedirects": false, - "readTimeout": "5s", - "connectTimeout": "5s", - "requestsPerSecond": 10, - "url": "[[httpLoadBalancedEndpoint]]", - "statusCode": "200-299" - }, - "customLabel": "INVARIANT: ${target.attr('steadybit.label')}'s features work within expected success rates", - "actionType": "com.steadybit.extension_http.check.periodically", - "radius": {} - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='k6'> - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "environment": [], - "file": "[[k6LoadTestFile]]" - }, - "actionType": "com.steadybit.extension_k6.run", - "radius": {} - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='jmeter'> - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "file": "[[jmeterLoadTestFile]]", - "parameter": [] - }, - "actionType": "com.steadybit.extension_jmeter.run", - "radius": {} - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='gatling'> - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "file": "[[gatlingLoadTestFile]]", - "parameter": [] - }, - "actionType": "com.steadybit.extension_gatling.run", - "radius": {} - } - <#else> - { - "type": "wait", - "ignoreFailure": false, - "parameters": { - "duration": "140s" - }, - "customLabel": "TODO VALIDATION: INVARIANT: ${target.attr('steadybit.label')}'s features work within expected success rates" - } - - ] - }, - { - "steps": [ - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "20s", - "podCountCheckMode": "podCountEqualsDesiredCount" - }, - "customLabel": "GIVEN: All pods in ${target.attr('azure.zone', 0)} are ready", - "actionType": "<#if target.id.type=='com.steadybit.extension_kubernetes.kubernetes-deployment'>com.steadybit.extension_kubernetes.pod_count_check<#elseif target.id.type=='com.steadybit.extension_kubernetes.kubernetes-statefulset'>com.steadybit.extension_kubernetes.pod_count_check_statefulset<#else>com.steadybit.extension_kubernetes.pod_count_check_daemonset", - "radius": { - "targetType": "${target.id.type}", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "azure.zone", - "operator": "EQUALS", - "values": [ - "${target.attr('azure.zone', 0)}" - ] - }, - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - }, - { - "key": "k8s.namespace", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.namespace')}" - ] - }, - { - "key": "k8s.${target.attr('k8s.workload-type')}", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.workload-owner')}" - ] - } - ] - }, - "query": null - } - } - ] - }, - { - "steps": [ - { - "type": "wait", - "ignoreFailure": false, - "parameters": { - "duration": "20s" - }, - "customLabel": "Wait for Azure Zone outage" - }, - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "ip": [], - "port": [], - "duration": "60s", - "hostname": [], - "failOnHostNetwork": true - }, - "customLabel": "WHEN: Zone outage of ${target.attr('azure.zone', 0)} for ${target.attr('steadybit.label')}", - "actionType": "com.steadybit.extension_container.network_blackhole", - "radius": { - "targetType": "com.steadybit.extension_container.container", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "azure.zone", - "operator": "EQUALS", - "values": [ - "${target.attr('azure.zone', 0)}" - ] - }, - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - }, - { - "key": "k8s.namespace", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.namespace')}" - ] - }, - { - "key": "k8s.${target.attr('k8s.workload-type')}", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.workload-owner')}" - ] - } - ] - }, - "query": null, - "percentage": 50 - } - }, - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "60s", - "podCountCheckMode": "podCountEqualsDesiredCount" - }, - "customLabel": "THEN: After Zone outage, all pods become ready again within 60s", - "actionType": "<#if target.id.type=='com.steadybit.extension_kubernetes.kubernetes-deployment'>com.steadybit.extension_kubernetes.pod_count_check<#elseif target.id.type=='com.steadybit.extension_kubernetes.kubernetes-statefulset'>com.steadybit.extension_kubernetes.pod_count_check_statefulset<#else>com.steadybit.extension_kubernetes.pod_count_check_daemonset", - "radius": { - "targetType": "${target.id.type}", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "azure.zone", - "operator": "EQUALS", - "values": [ - "${target.attr('azure.zone', 0)}" - ] - }, - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - }, - { - "key": "k8s.namespace", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.namespace')}" - ] - }, - { - "key": "k8s.${target.attr('k8s.workload-type')}", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.workload-owner')}" - ] - } - ] - }, - "query": null - } - } - ] - }, - { - "steps": [ - { - "type": "wait", - "ignoreFailure": false, - "parameters": { - "duration": "20s" - }, - "customLabel": "Wait for Azure Zone outage" - }, - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "45s", - "podCountCheckMode": "podCountLessThanDesiredCount" - }, - "customLabel": "THEN: Pods are detected as down", - "actionType": "<#if target.id.type=='com.steadybit.extension_kubernetes.kubernetes-deployment'>com.steadybit.extension_kubernetes.pod_count_check<#elseif target.id.type=='com.steadybit.extension_kubernetes.kubernetes-statefulset'>com.steadybit.extension_kubernetes.pod_count_check_statefulset<#else>com.steadybit.extension_kubernetes.pod_count_check_daemonset", - "radius": { - "targetType": "${target.id.type}", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "azure.zone", - "operator": "EQUALS", - "values": [ - "${target.attr('azure.zone', 0)}" - ] - }, - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - }, - { - "key": "k8s.namespace", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.namespace')}" - ] - }, - { - "key": "k8s.${target.attr('k8s.workload-type')}", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.workload-owner')}" - ] - } - ] - }, - "query": null - } - } - ] - }, - { - "steps": [ - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "140s" - }, - "customLabel": "Show Kubernetes events from the cluster", - "actionType": "com.steadybit.extension_kubernetes.kubernetes_logs", - "radius": { - "targetType": "com.steadybit.extension_kubernetes.kubernetes-cluster", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - } - ] - }, - "query": null - } - } - ] - }, - { - "steps": [ - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "140s" - }, - "customLabel": "Show Pod Count Metrics for the cluster", - "actionType": "com.steadybit.extension_kubernetes.pod_count_metric", - "radius": { - "targetType": "com.steadybit.extension_kubernetes.kubernetes-cluster", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - } - ] - }, - "query": null - } - } - ] - } - ] -} diff --git a/extadvice/single_azure_zone/implemented.md b/extadvice/single_azure_zone/implemented.md deleted file mode 100644 index 3471ceb..0000000 --- a/extadvice/single_azure_zone/implemented.md +++ /dev/null @@ -1,2 +0,0 @@ -Right now, when availability zone *${target.attr('azure.zone',0)}* fails, your service *${target.attr('steadybit.label')}* will still be available because you use *${target.attrs('azure.zone')?size}* zones to handle requests. - diff --git a/extadvice/single_azure_zone/instructions.md b/extadvice/single_azure_zone/instructions.md deleted file mode 100644 index 4e05b33..0000000 --- a/extadvice/single_azure_zone/instructions.md +++ /dev/null @@ -1,30 +0,0 @@ -Schedule Kubernetes nodes in different availability zones and configure a `podAntiAffinity` to spread your pods across different zones. - -```yaml -apiVersion: apps/v1 -kind: Deployment -spec: - selector: - matchLabels: - app: example - template: - metadata: - labels: - app: example - spec: -% startHighlight % - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - example - topologyKey: "topology.kubernetes.io/zone" -% endHighlight % - containers: - - name: example - image: images.my-company.example/app:v4 -``` diff --git a/extadvice/single_azure_zone/motivation.md b/extadvice/single_azure_zone/motivation.md deleted file mode 100644 index 9593c11..0000000 --- a/extadvice/single_azure_zone/motivation.md +++ /dev/null @@ -1,2 +0,0 @@ -An availability zone can be unavailable as they are not redundantly designed. -In order to survive an outage of the availability zone *${target.attr('azure.zone',0)}* you should spread your Kubernetes pods across multiple availability zones. diff --git a/extadvice/single_azure_zone/single_azure_zone.go b/extadvice/single_azure_zone/single_azure_zone.go deleted file mode 100644 index f48a936..0000000 --- a/extadvice/single_azure_zone/single_azure_zone.go +++ /dev/null @@ -1,57 +0,0 @@ -package single_azure_zone - -import ( - "embed" - "github.com/steadybit/advice-kit/go/advice_kit_api" - "github.com/steadybit/extension-kit/extbuild" - "github.com/steadybit/extension-kit/extutil" - "github.com/steadybit/extension-kubernetes/extadvice/advice_common" - "github.com/steadybit/extension-kubernetes/extdaemonset" - "github.com/steadybit/extension-kubernetes/extdeployment" - "github.com/steadybit/extension-kubernetes/extstatefulset" -) - -const SingleAzureZoneID = "com.steadybit.extension_kubernetes.advice.single-azure-zone" - -//go:embed * -var SingleAzureZoneContent embed.FS - -func GetAdviceDescriptionSingleAzureZone() advice_kit_api.AdviceDefinition { - return advice_kit_api.AdviceDefinition{ - Id: SingleAzureZoneID, - Label: "Scheduling Pods Across Azure Zones", - Version: extbuild.GetSemverVersionStringOrUnknown(), - Icon: "data:image/svg+xml,%3Csvg%20width%3D%2224%22%20height%3D%2224%22%20viewBox%3D%220%200%2024%2024%22%20fill%3D%22none%22%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%3E%0A%3Cpath%20d%3D%22M11.9436%207.04563C12.1262%206.98477%2012.3235%206.98477%2012.5061%207.04563L17.8407%208.82395C18.2037%208.94498%2018.4486%209.28468%2018.4485%209.66728C18.4485%2010.0499%2018.2036%2010.3895%2017.8405%2010.5105L12.5059%2012.2877C12.3235%2012.3485%2012.1262%2012.3485%2011.9438%2012.2877L6.60918%2010.5105C6.24611%2010.3895%206.00119%2010.0499%206.00116%209.66728C6.00112%209.28468%206.24598%208.94498%206.60902%208.82395L11.9436%207.04563Z%22%20fill%3D%22%231D2632%22%2F%3E%0A%3Cpath%20d%3D%22M7.20674%2013.2736C6.68268%2013.0989%206.11622%2013.3821%205.94153%2013.9062C5.76684%2014.4302%206.05007%2014.9967%206.57414%2015.1714L11.9087%2016.9496C12.114%2017.018%2012.336%2017.018%2012.5413%2016.9496L17.8759%2015.1714C18.4%2014.9967%2018.6832%2014.4302%2018.5085%2013.9062C18.3338%2013.3821%2017.7674%2013.0989%2017.2433%2013.2736L12.225%2014.9463L7.20674%2013.2736Z%22%20fill%3D%22%231D2632%22%2F%3E%0A%3Cpath%20fill-rule%3D%22evenodd%22%20clip-rule%3D%22evenodd%22%20d%3D%22M11.6491%201.06354C11.8754%200.97882%2012.1246%200.97882%2012.3509%201.06354L22.3506%204.80836C22.7412%204.95463%2023%205.32784%2023%205.74482V18.2552C23%2018.6722%2022.7412%2019.0454%2022.3506%2019.1916L12.3509%2022.9365C12.1246%2023.0212%2011.8754%2023.0212%2011.6491%2022.9365L1.64938%2019.1916C1.2588%2019.0454%201%2018.6722%201%2018.2552V5.74482C1%205.32784%201.2588%204.95463%201.64938%204.80836L11.6491%201.06354ZM3.00047%206.43809V17.5619L12%2020.9321L20.9995%2017.5619V6.43809L12%203.06785L3.00047%206.43809Z%22%20fill%3D%22%231D2632%22%2F%3E%0A%3C%2Fsvg%3E%0A", - Tags: &[]string{"kubernetes", "daemonset", "deployment", "statefulset", "azure", "zone"}, - AssessmentQueryApplicable: "(target.type=\"" + extdaemonset.DaemonSetTargetType + "\" OR target.type=\"" + extdeployment.DeploymentTargetType + "\" OR target.type=\"" + extstatefulset.StatefulSetTargetType + "\") AND azure.zone IS PRESENT", - Status: advice_kit_api.AdviceDefinitionStatus{ - ActionNeeded: advice_kit_api.AdviceDefinitionStatusActionNeeded{ - AssessmentQuery: "count(azure.zone) = 1", - Description: advice_kit_api.AdviceDefinitionStatusActionNeededDescription{ - Instruction: advice_common.ReadAdviceFile(SingleAzureZoneContent, "instructions.md"), - Motivation: advice_common.ReadAdviceFile(SingleAzureZoneContent, "motivation.md"), - Summary: advice_common.ReadAdviceFile(SingleAzureZoneContent, "action_needed_summary.md"), - }, - }, - Implemented: advice_kit_api.AdviceDefinitionStatusImplemented{ - Description: advice_kit_api.AdviceDefinitionStatusImplementedDescription{ - Summary: advice_common.ReadAdviceFile(SingleAzureZoneContent, "implemented.md"), - }, - }, - ValidationNeeded: advice_kit_api.AdviceDefinitionStatusValidationNeeded{ - Description: advice_kit_api.AdviceDefinitionStatusValidationNeededDescription{ - Summary: advice_common.ReadAdviceFile(SingleAzureZoneContent, "validation_needed.md"), - }, - Validation: extutil.Ptr([]advice_kit_api.Validation{ - { - Id: "com.steadybit.extension_kubernetes.single-azure-zone.experiment-1", - Type: "EXPERIMENT", - Name: "Availability Zone Outage", - ShortDescription: "When a single Azure availability zone fails, there are still pods of ${target.attr('steadybit.label')} ready to continue providing offered features.", - ExperimentTemplate: extutil.Ptr(advice_kit_api.ExperimentTemplate(advice_common.ReadAdviceFile(SingleAzureZoneContent, "experiment_zone_outage.json.ftl"))), - }, - }), - }, - }, - } -} diff --git a/extadvice/single_azure_zone/validation_needed.md b/extadvice/single_azure_zone/validation_needed.md deleted file mode 100644 index 9e89d2c..0000000 --- a/extadvice/single_azure_zone/validation_needed.md +++ /dev/null @@ -1,2 +0,0 @@ -Right now, your pods are spread across multiple zones. -Now, validate your redundancy by simulating an outage of one zone for *${target.attr('steadybit.label')}*. diff --git a/extadvice/single_gcp_zone/action_needed_summary.md b/extadvice/single_gcp_zone/action_needed_summary.md deleted file mode 100644 index b45b749..0000000 --- a/extadvice/single_gcp_zone/action_needed_summary.md +++ /dev/null @@ -1 +0,0 @@ -Right now, when availability zone *${target.attr('gcp.zone',0)}* is unavailable, all of *${target.attr('steadybit.label')}*'s Kubernetes pods are unavailable as well. diff --git a/extadvice/single_gcp_zone/experiment_zone_outage.json.ftl b/extadvice/single_gcp_zone/experiment_zone_outage.json.ftl deleted file mode 100644 index 31299df..0000000 --- a/extadvice/single_gcp_zone/experiment_zone_outage.json.ftl +++ /dev/null @@ -1,382 +0,0 @@ -{ - "templateTitle": "GCP Zone outage", - "templateDescription": "Check what happens when a GCP Availability Zone is down and validate that Kubernetes manages this accordingly by routing the traffic within expected failure rates so that the offered features still work. As soon as the zone is available again, the pod should be ready again within 60s.", - "placeholders": [ - <#if target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='http'> - { - "key": "httpLoadBalancedEndpoint", - "name": "HTTP Load Balanced Endpoint", - "description": "### What is the URL of a **load-balanced HTTP endpoint** served by the Kubernetes workload?\n\nWe will use the HTTP endpoint to validate that the provided service's features are working fine for the entire experiment duration." - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='k6'> - { - "key": "k6LoadTestFile", - "name": "k6 Load Test File", - "description": "### Specify a k6 load test file to validate the service's functionality.\n\nWe will use the load test to validate that the provided service's features are working fine for the entire experiment duration." - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='jmeter'> - { - "key": "jmeterLoadTestFile", - "name": "JMeter Load Test File", - "description": "### Specify a JMeter load test file to validate the service's functionality.\n\nWe will use the load test to validate that the provided service's features are working fine for the entire experiment duration." - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='gatling'> - { - "key": "gatlingLoadTestFile", - "name": "Gatling Load Test File", - "description": "### Specify a Gatling load test file to validate the service's functionality.\n\nWe will use the load test to validate that the provided service's features are working fine for the entire experiment duration." - } - - ], - "tags": ["Redundancy", "GCP", "Availability Zone", "Advice"], - "experimentName": "GCP Zone Outage of ${target.attr('gcp.zone', 0)} for ${target.attr('steadybit.label')}", - "hypothesis": "When GCP Availability Zone ${target.attr('gcp.zone', 0)} is down for ${target.attr('steadybit.label')}, Kubernetes manages this accordingly by routing the traffic within expected failure rates so that the offered features still work. As soon as the zone is available again, the pod is ready within 60s.", - "lanes": [ - { - "steps": [ - <#if target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='http'> - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "140s", - "headers": [], - "method": "GET", - "successRate": 100, - "maxConcurrent": 5, - "followRedirects": false, - "readTimeout": "5s", - "connectTimeout": "5s", - "requestsPerSecond": 10, - "url": "[[httpLoadBalancedEndpoint]]", - "statusCode": "200-299" - }, - "customLabel": "INVARIANT: ${target.attr('steadybit.label')}'s features work within expected success rates", - "actionType": "com.steadybit.extension_http.check.periodically", - "radius": {} - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='k6'> - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "environment": [], - "file": "[[k6LoadTestFile]]" - }, - "actionType": "com.steadybit.extension_k6.run", - "radius": {} - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='jmeter'> - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "file": "[[jmeterLoadTestFile]]", - "parameter": [] - }, - "actionType": "com.steadybit.extension_jmeter.run", - "radius": {} - } - <#elseif target.attr('k8s.label.tags.steadybit.com/service-validation')?? && target.attr('k8s.label.tags.steadybit.com/service-validation')=='gatling'> - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "file": "[[gatlingLoadTestFile]]", - "parameter": [] - }, - "actionType": "com.steadybit.extension_gatling.run", - "radius": {} - } - <#else> - { - "type": "wait", - "ignoreFailure": false, - "parameters": { - "duration": "140s" - }, - "customLabel": "TODO VALIDATION: INVARIANT: ${target.attr('steadybit.label')}'s features work within expected success rates" - } - - ] - }, - { - "steps": [ - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "20s", - "podCountCheckMode": "podCountEqualsDesiredCount" - }, - "customLabel": "GIVEN: All pods in ${target.attr('gcp.zone', 0)} are ready", - "actionType": "<#if target.id.type=='com.steadybit.extension_kubernetes.kubernetes-deployment'>com.steadybit.extension_kubernetes.pod_count_check<#elseif target.id.type=='com.steadybit.extension_kubernetes.kubernetes-statefulset'>com.steadybit.extension_kubernetes.pod_count_check_statefulset<#else>com.steadybit.extension_kubernetes.pod_count_check_daemonset", - "radius": { - "targetType": "${target.id.type}", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "gcp.zone", - "operator": "EQUALS", - "values": [ - "${target.attr('gcp.zone', 0)}" - ] - }, - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - }, - { - "key": "k8s.namespace", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.namespace')}" - ] - }, - { - "key": "k8s.${target.attr('k8s.workload-type')}", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.workload-owner')}" - ] - } - ] - }, - "query": null - } - } - ] - }, - { - "steps": [ - { - "type": "wait", - "ignoreFailure": false, - "parameters": { - "duration": "20s" - }, - "customLabel": "Wait for GCP Zone outage" - }, - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "ip": [], - "port": [], - "duration": "60s", - "hostname": [], - "failOnHostNetwork": true - }, - "customLabel": "WHEN: Zone outage of ${target.attr('gcp.zone', 0)} for ${target.attr('steadybit.label')}", - "actionType": "com.steadybit.extension_container.network_blackhole", - "radius": { - "targetType": "com.steadybit.extension_container.container", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "gcp.zone", - "operator": "EQUALS", - "values": [ - "${target.attr('gcp.zone', 0)}" - ] - }, - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - }, - { - "key": "k8s.namespace", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.namespace')}" - ] - }, - { - "key": "k8s.${target.attr('k8s.workload-type')}", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.workload-owner')}" - ] - } - ] - }, - "query": null, - "percentage": 50 - } - }, - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "60s", - "podCountCheckMode": "podCountEqualsDesiredCount" - }, - "customLabel": "THEN: After Zone outage, all pods become ready again within 60s", - "actionType": "<#if target.id.type=='com.steadybit.extension_kubernetes.kubernetes-deployment'>com.steadybit.extension_kubernetes.pod_count_check<#elseif target.id.type=='com.steadybit.extension_kubernetes.kubernetes-statefulset'>com.steadybit.extension_kubernetes.pod_count_check_statefulset<#else>com.steadybit.extension_kubernetes.pod_count_check_daemonset", - "radius": { - "targetType": "${target.id.type}", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "gcp.zone", - "operator": "EQUALS", - "values": [ - "${target.attr('gcp.zone', 0)}" - ] - }, - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - }, - { - "key": "k8s.namespace", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.namespace')}" - ] - }, - { - "key": "k8s.${target.attr('k8s.workload-type')}", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.workload-owner')}" - ] - } - ] - }, - "query": null - } - } - ] - }, - { - "steps": [ - { - "type": "wait", - "ignoreFailure": false, - "parameters": { - "duration": "20s" - }, - "customLabel": "Wait for GCP Zone outage" - }, - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "45s", - "podCountCheckMode": "podCountLessThanDesiredCount" - }, - "customLabel": "THEN: Pods are detected as down", - "actionType": "<#if target.id.type=='com.steadybit.extension_kubernetes.kubernetes-deployment'>com.steadybit.extension_kubernetes.pod_count_check<#elseif target.id.type=='com.steadybit.extension_kubernetes.kubernetes-statefulset'>com.steadybit.extension_kubernetes.pod_count_check_statefulset<#else>com.steadybit.extension_kubernetes.pod_count_check_daemonset", - "radius": { - "targetType": "${target.id.type}", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "gcp.zone", - "operator": "EQUALS", - "values": [ - "${target.attr('gcp.zone', 0)}" - ] - }, - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - }, - { - "key": "k8s.namespace", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.namespace')}" - ] - }, - { - "key": "k8s.${target.attr('k8s.workload-type')}", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.workload-owner')}" - ] - } - ] - }, - "query": null - } - } - ] - }, - { - "steps": [ - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "140s" - }, - "customLabel": "Show Kubernetes events from the cluster", - "actionType": "com.steadybit.extension_kubernetes.kubernetes_logs", - "radius": { - "targetType": "com.steadybit.extension_kubernetes.kubernetes-cluster", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - } - ] - }, - "query": null - } - } - ] - }, - { - "steps": [ - { - "type": "action", - "ignoreFailure": false, - "parameters": { - "duration": "140s" - }, - "customLabel": "Show Pod Count Metrics for the cluster", - "actionType": "com.steadybit.extension_kubernetes.pod_count_metric", - "radius": { - "targetType": "com.steadybit.extension_kubernetes.kubernetes-cluster", - "predicate": { - "operator": "AND", - "predicates": [ - { - "key": "k8s.cluster-name", - "operator": "EQUALS", - "values": [ - "${target.attr('k8s.cluster-name')}" - ] - } - ] - }, - "query": null - } - } - ] - } - ] -} diff --git a/extadvice/single_gcp_zone/implemented.md b/extadvice/single_gcp_zone/implemented.md deleted file mode 100644 index 11ebce0..0000000 --- a/extadvice/single_gcp_zone/implemented.md +++ /dev/null @@ -1 +0,0 @@ -Right now, when availability zone *${target.attr('gcp.zone',0)}* fails, your service *${target.attr('steadybit.label')}* will still be available because you use *${target.attrs('gcp.zone')?size}* zones to handle requests. diff --git a/extadvice/single_gcp_zone/instructions.md b/extadvice/single_gcp_zone/instructions.md deleted file mode 100644 index 4e05b33..0000000 --- a/extadvice/single_gcp_zone/instructions.md +++ /dev/null @@ -1,30 +0,0 @@ -Schedule Kubernetes nodes in different availability zones and configure a `podAntiAffinity` to spread your pods across different zones. - -```yaml -apiVersion: apps/v1 -kind: Deployment -spec: - selector: - matchLabels: - app: example - template: - metadata: - labels: - app: example - spec: -% startHighlight % - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - example - topologyKey: "topology.kubernetes.io/zone" -% endHighlight % - containers: - - name: example - image: images.my-company.example/app:v4 -``` diff --git a/extadvice/single_gcp_zone/motivation.md b/extadvice/single_gcp_zone/motivation.md deleted file mode 100644 index d70af73..0000000 --- a/extadvice/single_gcp_zone/motivation.md +++ /dev/null @@ -1,2 +0,0 @@ -An availability zone can be unavailable as they are not redundantly designed. -In order to survive an outage of the availability zone *${target.attr('gcp.zone',0)}* you should spread your Kubernetes pods across multiple availability zones. diff --git a/extadvice/single_gcp_zone/single_gcp_zone.go b/extadvice/single_gcp_zone/single_gcp_zone.go deleted file mode 100644 index ff4b039..0000000 --- a/extadvice/single_gcp_zone/single_gcp_zone.go +++ /dev/null @@ -1,57 +0,0 @@ -package single_gcp_zone - -import ( - "embed" - "github.com/steadybit/advice-kit/go/advice_kit_api" - "github.com/steadybit/extension-kit/extbuild" - "github.com/steadybit/extension-kit/extutil" - "github.com/steadybit/extension-kubernetes/extadvice/advice_common" - "github.com/steadybit/extension-kubernetes/extdaemonset" - "github.com/steadybit/extension-kubernetes/extdeployment" - "github.com/steadybit/extension-kubernetes/extstatefulset" -) - -const SingleGCPZoneID = "com.steadybit.extension_kubernetes.advice.single-gcp-zone" - -//go:embed * -var SingleGcpZoneContent embed.FS - -func GetAdviceDescriptionSingleGcpZone() advice_kit_api.AdviceDefinition { - return advice_kit_api.AdviceDefinition{ - Id: SingleGCPZoneID, - Label: "Schedule Pods Across GCP Zones", - Version: extbuild.GetSemverVersionStringOrUnknown(), - Icon: "data:image/svg+xml,%3Csvg%20width%3D%2224%22%20height%3D%2224%22%20viewBox%3D%220%200%2024%2024%22%20fill%3D%22none%22%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%3E%0A%3Cpath%20d%3D%22M11.9436%207.04563C12.1262%206.98477%2012.3235%206.98477%2012.5061%207.04563L17.8407%208.82395C18.2037%208.94498%2018.4486%209.28468%2018.4485%209.66728C18.4485%2010.0499%2018.2036%2010.3895%2017.8405%2010.5105L12.5059%2012.2877C12.3235%2012.3485%2012.1262%2012.3485%2011.9438%2012.2877L6.60918%2010.5105C6.24611%2010.3895%206.00119%2010.0499%206.00116%209.66728C6.00112%209.28468%206.24598%208.94498%206.60902%208.82395L11.9436%207.04563Z%22%20fill%3D%22%231D2632%22%2F%3E%0A%3Cpath%20d%3D%22M7.20674%2013.2736C6.68268%2013.0989%206.11622%2013.3821%205.94153%2013.9062C5.76684%2014.4302%206.05007%2014.9967%206.57414%2015.1714L11.9087%2016.9496C12.114%2017.018%2012.336%2017.018%2012.5413%2016.9496L17.8759%2015.1714C18.4%2014.9967%2018.6832%2014.4302%2018.5085%2013.9062C18.3338%2013.3821%2017.7674%2013.0989%2017.2433%2013.2736L12.225%2014.9463L7.20674%2013.2736Z%22%20fill%3D%22%231D2632%22%2F%3E%0A%3Cpath%20fill-rule%3D%22evenodd%22%20clip-rule%3D%22evenodd%22%20d%3D%22M11.6491%201.06354C11.8754%200.97882%2012.1246%200.97882%2012.3509%201.06354L22.3506%204.80836C22.7412%204.95463%2023%205.32784%2023%205.74482V18.2552C23%2018.6722%2022.7412%2019.0454%2022.3506%2019.1916L12.3509%2022.9365C12.1246%2023.0212%2011.8754%2023.0212%2011.6491%2022.9365L1.64938%2019.1916C1.2588%2019.0454%201%2018.6722%201%2018.2552V5.74482C1%205.32784%201.2588%204.95463%201.64938%204.80836L11.6491%201.06354ZM3.00047%206.43809V17.5619L12%2020.9321L20.9995%2017.5619V6.43809L12%203.06785L3.00047%206.43809Z%22%20fill%3D%22%231D2632%22%2F%3E%0A%3C%2Fsvg%3E%0A", - Tags: &[]string{"kubernetes", "daemonset", "deployment", "statefulset", "gcp", "zone"}, - AssessmentQueryApplicable: "(target.type=\"" + extdaemonset.DaemonSetTargetType + "\" OR target.type=\"" + extdeployment.DeploymentTargetType + "\" OR target.type=\"" + extstatefulset.StatefulSetTargetType + "\") AND gcp.zone IS PRESENT", - Status: advice_kit_api.AdviceDefinitionStatus{ - ActionNeeded: advice_kit_api.AdviceDefinitionStatusActionNeeded{ - AssessmentQuery: "count(gcp.zone) = 1", - Description: advice_kit_api.AdviceDefinitionStatusActionNeededDescription{ - Instruction: advice_common.ReadAdviceFile(SingleGcpZoneContent, "instructions.md"), - Motivation: advice_common.ReadAdviceFile(SingleGcpZoneContent, "motivation.md"), - Summary: advice_common.ReadAdviceFile(SingleGcpZoneContent, "action_needed_summary.md"), - }, - }, - Implemented: advice_kit_api.AdviceDefinitionStatusImplemented{ - Description: advice_kit_api.AdviceDefinitionStatusImplementedDescription{ - Summary: advice_common.ReadAdviceFile(SingleGcpZoneContent, "implemented.md"), - }, - }, - ValidationNeeded: advice_kit_api.AdviceDefinitionStatusValidationNeeded{ - Description: advice_kit_api.AdviceDefinitionStatusValidationNeededDescription{ - Summary: advice_common.ReadAdviceFile(SingleGcpZoneContent, "validation_needed.md"), - }, - Validation: extutil.Ptr([]advice_kit_api.Validation{ - { - Id: "com.steadybit.extension_kubernetes.single-gcp-zone.experiment-1", - Type: "EXPERIMENT", - Name: "Availability Zone Outage", - ShortDescription: "When a single GCP availability zone fails, there are still pods of ${target.attr('steadybit.label')} ready to continue providing offered features.", - ExperimentTemplate: extutil.Ptr(advice_kit_api.ExperimentTemplate(advice_common.ReadAdviceFile(SingleGcpZoneContent, "experiment_zone_outage.json.ftl"))), - }, - }), - }, - }, - } -} diff --git a/extadvice/single_gcp_zone/validation_needed.md b/extadvice/single_gcp_zone/validation_needed.md deleted file mode 100644 index 9e89d2c..0000000 --- a/extadvice/single_gcp_zone/validation_needed.md +++ /dev/null @@ -1,2 +0,0 @@ -Right now, your pods are spread across multiple zones. -Now, validate your redundancy by simulating an outage of one zone for *${target.attr('steadybit.label')}*. diff --git a/main.go b/main.go index 70bc66c..d3aca24 100644 --- a/main.go +++ b/main.go @@ -30,9 +30,6 @@ import ( "github.com/steadybit/extension-kubernetes/extadvice/memory_limit" "github.com/steadybit/extension-kubernetes/extadvice/memory_request" "github.com/steadybit/extension-kubernetes/extadvice/probes" - "github.com/steadybit/extension-kubernetes/extadvice/single_aws_zone" - "github.com/steadybit/extension-kubernetes/extadvice/single_azure_zone" - "github.com/steadybit/extension-kubernetes/extadvice/single_gcp_zone" "github.com/steadybit/extension-kubernetes/extadvice/single_replica" "github.com/steadybit/extension-kubernetes/extadvice/single_zone" "github.com/steadybit/extension-kubernetes/extcluster" @@ -139,9 +136,6 @@ func main() { exthttp.RegisterHttpHandler("/advice/k8s-probes", exthttp.GetterAsHandler(probes.GetAdviceDescriptionProbes)) exthttp.RegisterHttpHandler("/advice/k8s-single-replica", exthttp.GetterAsHandler(single_replica.GetAdviceDescriptionSingleReplica)) exthttp.RegisterHttpHandler("/advice/k8s-host-podantiaffinity", exthttp.GetterAsHandler(host_podantiaffinity.GetAdviceDescriptionHostPodantiaffinity)) - exthttp.RegisterHttpHandler("/advice/single-aws-zone", exthttp.GetterAsHandler(single_aws_zone.GetAdviceDescriptionSingleAwsZone)) - exthttp.RegisterHttpHandler("/advice/single-azure-zone", exthttp.GetterAsHandler(single_azure_zone.GetAdviceDescriptionSingleAzureZone)) - exthttp.RegisterHttpHandler("/advice/single-gcp-zone", exthttp.GetterAsHandler(single_gcp_zone.GetAdviceDescriptionSingleGcpZone)) exthttp.RegisterHttpHandler("/advice/single-zone", exthttp.GetterAsHandler(single_zone.GetAdviceDescriptionSingleZone)) extsignals.ActivateSignalHandlers() @@ -247,24 +241,6 @@ func getAdviceRefs() []advice_kit_api.DescribingEndpointReference { Path: "/advice/k8s-host-podantiaffinity", }) } - if adviceId == "*" || adviceId == single_aws_zone.SingleAWSZoneID { - refs = append(refs, advice_kit_api.DescribingEndpointReference{ - Method: "GET", - Path: "/advice/single-aws-zone", - }) - } - if adviceId == "*" || adviceId == single_azure_zone.SingleAzureZoneID { - refs = append(refs, advice_kit_api.DescribingEndpointReference{ - Method: "GET", - Path: "/advice/single-azure-zone", - }) - } - if adviceId == "*" || adviceId == single_gcp_zone.SingleGCPZoneID { - refs = append(refs, advice_kit_api.DescribingEndpointReference{ - Method: "GET", - Path: "/advice/single-gcp-zone", - }) - } if adviceId == "*" || adviceId == single_zone.SingleZoneID { refs = append(refs, advice_kit_api.DescribingEndpointReference{ Method: "GET",