Skip to content

Commit

Permalink
Add debug info collection to functional tests ghwh so when functional…
Browse files Browse the repository at this point in the history
… tests fail for kind clusters the state of the cluster is recorded and uploaded as a zip file artifact of the run of the ghwh
  • Loading branch information
jvoravong committed Dec 18, 2024
1 parent bc73797 commit 8237e89
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 17 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/functional_test_v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,25 @@ jobs:
run: |
make cert-manager
- name: run functional tests
id: run-functional-tests
env:
K8S_VERSION: ${{ matrix.k8s-version }}
run: |
cd functional_tests
TEARDOWN_BEFORE_SETUP=true UPDATE_EXPECTED_RESULTS=${{ env.UPDATE_EXPECTED_RESULTS }} go test -v -tags ${{ matrix.test-job }}
- name: Collect Kubernetes Cluster debug info on failure
if: always() && failure()
id: collect-debug-info
run: |
echo "Functional tests failed. Collecting debug info for current state of the Kubernetes cluster..."
./tools/splunk_kubernetes_debug_info.sh
- name: 'Upload Kubernetes Cluster debug info'
if: always() && steps.collect-debug-info.outcome == 'success'
uses: actions/upload-artifact@v4
with:
name: k8s-debug-info-${{ matrix.test-job }}-${{ matrix.k8s-version }}
path: splunk_kubernetes_debug_info*
retention-days: 5
- name: 'Upload test results'
if: always() && env.UPDATE_EXPECTED_RESULTS == 'true'
uses: actions/upload-artifact@v4
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
.idea
*.iml
.DS_Store
*splunk_kubernetes_debug_info*

# Helm
**/charts/*.tgz
Expand Down
49 changes: 32 additions & 17 deletions tools/splunk_kubernetes_debug_info.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ write_output() {
collect_data_namespace() {
local ns=$1

object_types=("deployments" "daemonsets" "configmaps" "secrets" "networkpolicies" "svc" "ingress" "endpoints" "roles" "rolebindings" "otelinst")
object_types=("deployments" "daemonsets" "configmaps" "secrets" "networkpolicies" "svc" "ingress" "endpoints" "roles" "rolebindings" "otelinst" "jobs" "events")
for type in "${object_types[@]}"; do
stdbuf -oL echo "Collecting $type data for $ns namespace with $k8s_object_name_filter name filter"
if [[ "$type" == "deployment" || "$type" == "daemonset" || "$type" == "configmaps" || "$type" == "secrets" ]]; then
Expand Down Expand Up @@ -201,21 +201,6 @@ collect_data_cluster() {
output=$(eval "$cmd")
write_output "$output" "$temp_dir/cluster_custom_resource_definitions.yaml" "$cmd"

echo "Collecting pod security policies..."
cmd="kubectl get psp -o yaml"
output=$(eval "$cmd")
write_output "$output" "$temp_dir/cluster_pod_security_policies.yaml" "$cmd"

echo "Collecting security context constraints..."
cmd="kubectl get scc -o yaml"
output=$(eval "$cmd")
write_output "$output" "$temp_dir/cluster_security_context_constraints.yaml" "$cmd"

echo "Collecting MutatingWebhookConfiguration objects..."
cmd="kubectl get mutatingwebhookconfiguration.admissionregistration.k8s.io -o yaml; kubectl describe mutatingwebhookconfiguration.admissionregistration.k8s.io; kubectl get --raw /metrics | grep apiserver_admission_webhook_rejection_count;"
output=$(eval "$cmd")
write_output "$output" "$temp_dir/cluster_webhooks.yaml" "$cmd"

echo "Checking for cert-manager installation..."
cert_manager_pods=$(kubectl get pods --all-namespaces -l app=cert-manager --no-headers)
if [ -n "$cert_manager_pods" ]; then
Expand All @@ -233,6 +218,33 @@ collect_data_cluster() {
done
}

collect_cluster_resources() {
# List of cluster-scoped resource types to collect
cluster_object_types=(
"crds"
"psp"
"scc"
"mutatingwebhookconfiguration.admissionregistration.k8s.io"
"validatingwebhookconfiguration.admissionregistration.k8s.io"
)

for type in "${cluster_object_types[@]}"; do
echo "Collecting $type cluster-scoped resources..."

# Fetch each object's name
kubectl get "$type" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' | while read object; do
# Get the API version for this object, fallback to "unknown"
api_version=$(kubectl get "$type" "$object" -o jsonpath='{.apiVersion}' 2>/dev/null || echo "unknown")
api_version=${api_version//\//_} # Sanitize slashes in API version

# Collect YAML output
cmd="kubectl get $type $object -o yaml"
output=$(eval "$cmd")
write_output "$output" "$temp_dir/cluster_${type//./_}_${api_version}_${object}.yaml" "$cmd"
done
done
}

# Parse input parameters
namespaces=""
k8s_object_name_filter="splunk|collector|otel|certmanager|test|sck|sock"
Expand Down Expand Up @@ -279,9 +291,12 @@ script_start_time=$(date +"%Y-%m-%d %H:%M:%S")
echo "Script start time: $script_start_time"
echo "Script start time: $script_start_time" >> "$output_file"

# Collect cluster-wide data
# Collect cluster instance specific data
collect_data_cluster

# Collect cluster scoped resources data
collect_cluster_resources

# Function to manage parallel processing of namespaces
collect_data_namespace_namespaces() {
local parallelism=20
Expand Down

0 comments on commit 8237e89

Please sign in to comment.