diff --git a/docs/testing.md b/docs/testing.md index 9506dd68..a7fb5b12 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -30,7 +30,7 @@ agent1 -> agent/worker node Note/TODO: k3s external db fails working with etcd only node. Refer: https://docs.k3s.io/datastore/ha ### Secret-Encryption tests Setup Requirements/Assumptions -We need a split role setup for this test: +For patch validation test runs, we need a split role setup for this test: 1 Etcd ONLY node 2 Control Plane ONLY node 1 Agent node @@ -47,13 +47,20 @@ etcd_worker_nodes = 0 cp_only_nodes = 2 # control plane only node count cp_worker_nodes = 0 # Numbers 1-6 correspond to: all-roles (1), etcd-only (2), etcd-cp (3), etcd-worker (4), cp-only (5), cp-worker (6). -role_order = "2,5,5" +role_order = "2,5" ``` +Please note, we can also run this test on a regular HA setup - 3 all-roles server, 1 worker node. (without split roles) -Please set the server_flags in .tfvars file: +Please set the server_flags in .tfvars file for k3s: ``` server_flags = "secrets-encryption: true\n" ``` +In case of rke2 - do not leave this empty for now. +At least set a "token: secret\n" for both server and agent flags as a workaround for: +https://github.com/rancher/distros-test-framework/issues/86 + +For versions 1.26 and 1.27 - we run the traditional tests only: prepare/rotate/renenrypt (TEST_TYPE gets set to 'classic' in env var. We use this to determine which tests to run.) +For versions 1.28 and greater - we run both the traditional tests and new method - rotate-keys (TEST_TYPE gets set to 'both' in env var) Note/TODO: k3s external db fails working with etcd only node. Refer: https://docs.k3s.io/datastore/ha diff --git a/entrypoint/secretsencrypt/encryption_suite_test.go b/entrypoint/secretsencrypt/encryption_suite_test.go index a0d9b659..fb43c7d5 100644 --- a/entrypoint/secretsencrypt/encryption_suite_test.go +++ b/entrypoint/secretsencrypt/encryption_suite_test.go @@ -40,9 +40,10 @@ var _ = BeforeSuite(func() { if err := config.SetEnv(shared.BasePath() + fmt.Sprintf("/config/%s.tfvars", cfg.Product)); err != nil { Expect(err).To(BeNil(), fmt.Sprintf("error loading tf vars: %v\n", err)) } - Expect(os.Getenv("server_flags")).To(ContainSubstring("secrets-encryption:"), - "FATAL: Add secrets-encryption:true to server_flags for this test") - + if cfg.Product == "k3s" { + Expect(os.Getenv("server_flags")).To(ContainSubstring("secrets-encryption:"), + "ERROR: Add secrets-encryption:true to server_flags for this test") + } version := os.Getenv(fmt.Sprintf("%s_version", cfg.Product)) if strings.Contains(version, "1.27") || strings.Contains(version, "1.26") { os.Setenv("TEST_TYPE", "classic") diff --git a/entrypoint/secretsencrypt/encryption_test.go b/entrypoint/secretsencrypt/encryption_test.go index 5d3c4cd8..3c48665b 100644 --- a/entrypoint/secretsencrypt/encryption_test.go +++ b/entrypoint/secretsencrypt/encryption_test.go @@ -31,7 +31,7 @@ var _ = Describe("Test:", func() { }) It("Validate Secrets Encryption", func() { - testcase.TestSecretsEncrypt() + testcase.TestSecretsEncryption() }) It("Validate Nodes", func() { diff --git a/pkg/testcase/secretsencrypt.go b/pkg/testcase/secretsencrypt.go index 4c7a34fe..420455ee 100644 --- a/pkg/testcase/secretsencrypt.go +++ b/pkg/testcase/secretsencrypt.go @@ -11,29 +11,27 @@ import ( . "github.com/onsi/gomega" ) -func TestSecretsEncrypt() { - etcdNodes, errGetEtcd := shared.GetNodesByRoles("etcd") - Expect(etcdNodes).NotTo(BeEmpty()) - Expect(errGetEtcd).NotTo(HaveOccurred(), "error getting etcd nodes") - - cpNodes, errGetCP := shared.GetNodesByRoles("control-plane") - Expect(cpNodes).NotTo(BeEmpty()) - Expect(errGetCP).NotTo(HaveOccurred(), "error getting control plane nodes") +func TestSecretsEncryption() { + nodes, errGetNodes := shared.GetNodesByRoles("etcd", "control-plane") + Expect(nodes).NotTo(BeEmpty()) + Expect(errGetNodes).NotTo(HaveOccurred(), "error getting etcd/control-plane nodes") product, err := shared.Product() Expect(err).NotTo(HaveOccurred(), "error getting product from config") - ips := getNodeIps(etcdNodes, cpNodes) + ips := getNodeIps(nodes) errSecret := shared.CreateSecret("secret1", "default") Expect(errSecret).NotTo(HaveOccurred(), "error creating secret") shared.LogLevel("INFO", "TEST: 'CLASSIC' Secrets Encryption method") - secretsEncryptOps("prepare", product, cpNodes[0].ExternalIP, ips) - secretsEncryptOps("rotate", product, cpNodes[0].ExternalIP, ips) - secretsEncryptOps("reencrypt", product, cpNodes[0].ExternalIP, ips) + index := len(nodes) - 1 + secretsEncryptOps("prepare", product, nodes[index].ExternalIP, ips) + secretsEncryptOps("rotate", product, nodes[index].ExternalIP, ips) + secretsEncryptOps("reencrypt", product, nodes[index].ExternalIP, ips) + if strings.Contains(os.Getenv("TEST_TYPE"), "both") { shared.LogLevel("INFO", "TEST: 'NEW' Secrets Encryption method") - secretsEncryptOps("rotate-keys", product, cpNodes[0].ExternalIP, ips) + secretsEncryptOps("rotate-keys", product, nodes[index].ExternalIP, ips) } } @@ -46,35 +44,63 @@ func secretsEncryptOps(action, product, cpIp string, ips []string) { Expect(err).NotTo(HaveOccurred(), "error: secret-encryption: "+action) verifyStdOut(action, stdOutput) if (action == "reencrypt") || (action == "rotate-keys") { - time.Sleep(20 * time.Second) + shared.LogLevel("DEBUG", "reencrypt op needs some time to complete - Sleep for 20 seconds before service restarts") + time.Sleep(20 * time.Second) // Wait for reencrypt action to complete before restarting services } - - for _, node := range ips { + for i, node := range ips { nodearr := []string{node} nodeIp, errRestart := shared.ManageService(product, "restart", "server", nodearr) Expect(errRestart).NotTo(HaveOccurred(), "error restart service for node: "+nodeIp) // Order of reboot matters. Etcd first then control plane nodes. // Little lag needed between node restarts to avoid issues. - shared.LogLevel("INFO", "Sleep 10 seconds - wait before restarting next node in cluster") - time.Sleep(10 * time.Second) + waitEtcdErr := shared.WaitForPodsRunning(5, 4, false) + if waitEtcdErr != nil { + shared.LogLevel("WARN", "pods not up after 20 seconds.") + if i != len(ips) { + shared.LogLevel("DEBUG", "continue service restarts") + } + } } switch product { case "k3s": - shared.LogLevel("INFO", "Sleep 30 seconds - wait for services to come up") - time.Sleep(30 * time.Second) + waitPodsErr := shared.WaitForPodsRunning(5, 6, false) + if waitPodsErr != nil { + shared.LogLevel("WARN", "pods not up after 30 seconds") + } case "rke2": - shared.LogLevel("INFO", "Sleep 60 seconds - wait for services to come up") - time.Sleep(60 * time.Second) + waitPodsErr := shared.WaitForPodsRunning(5, 12, false) + if waitPodsErr != nil { + shared.LogLevel("WARN", "pods not up after 60 seconds") + } } - stdStatusOut, errStatus := shared.SecretEncryptOps("status", cpIp, product) - Expect(errStatus).NotTo(HaveOccurred(), "error getting secret-encryption status") - verifyStatusOutput(action, stdStatusOut) + secretEncryptStatus, errGetStatus := waitForHashMatch(cpIp, product, 5, 36) // Max 3 minute wait time for hash to match + Expect(errGetStatus).NotTo(HaveOccurred(), "error getting secret-encryption status") + verifyStatusOutput(action, secretEncryptStatus) errLog := logEncryptionFileContents(ips, product) Expect(errLog).NotTo(HaveOccurred()) } +func waitForHashMatch(cpIp, product string, defaultTime time.Duration, times int) (string, error) { + var secretEncryptStatus string + var errGetStatus error + for i := 1; i <= times; i++ { + secretEncryptStatus, errGetStatus := shared.SecretEncryptOps("status", cpIp, product) + if errGetStatus != nil { + shared.LogLevel("DEBUG", "error getting secret-encryption status. Retry.") + } + if secretEncryptStatus != "" && strings.Contains(secretEncryptStatus, "All hashes match") { + shared.LogLevel("DEBUG", "Total sleep time before hashes matched: %d seconds", i*int(defaultTime)) + return secretEncryptStatus, nil + } else { + time.Sleep(defaultTime * time.Second) + } + } + shared.LogLevel("WARN", "Hashes did not match after %d seconds", times*int(defaultTime)) + return secretEncryptStatus, errGetStatus +} + func verifyStdOut(action, stdout string) { switch action { case "prepare": @@ -109,13 +135,11 @@ func verifyStatusOutput(action, stdout string) { } } -func getNodeIps(etcdNodes, cpNodes []shared.Node) []string { +func getNodeIps(nodes []shared.Node) []string { var nodeIps []string - for _, node := range etcdNodes { - nodeIps = append(nodeIps, node.ExternalIP) - } - for _, node := range cpNodes { + for _, node := range nodes { nodeIps = append(nodeIps, node.ExternalIP) + shared.LogLevel("DEBUG", "Node details: name: %s status: %s roles: %s external ip: %s", node.Name, node.Status, node.Roles, node.ExternalIP) } return nodeIps } @@ -131,10 +155,12 @@ func logEncryptionFileContents(ips []string, product string) error { if errConfig != nil { return shared.ReturnLogError(fmt.Sprintf("Error cat of %s", configFile)) } + shared.LogLevel("DEBUG", "cat %s:\n %s", configFile, configStdOut) currentTime := time.Now() Expect(configStdOut).To(ContainSubstring(fmt.Sprintf("aescbckey-%s", currentTime.Format("2006-01-02")))) - _, errState := shared.RunCommandOnNode(cmdShowState, ip) + stateOut, errState := shared.RunCommandOnNode(cmdShowState, ip) + shared.LogLevel("DEBUG", "cat %s:\n %s", stateFile, stateOut) if errState != nil { return shared.ReturnLogError(fmt.Sprintf("Error cat of %s", stateFile)) } diff --git a/shared/cluster.go b/shared/cluster.go index 584f0e1c..2be1f5c2 100644 --- a/shared/cluster.go +++ b/shared/cluster.go @@ -648,5 +648,49 @@ func CreateSecret(secret, namespace string) error { if strings.Contains(createStdOut, "failed to create secret") { return ReturnLogError("failed to create secret: \n%w", err) } + LogLevel("DEBUG", "Create Secret Output: %s", createStdOut) + return nil +} + +func checkPodStatus(print bool) bool { + podsRunningStatus := false + pods, errGetPods := GetPods(print) + if errGetPods != nil || len(pods) == 0 { + LogLevel("DEBUG", "Error getting pods. Retry.") + return podsRunningStatus + } + podReady := 0 + podNotReady := 0 + for _, pod := range pods { + if pod.Status == "Running" || pod.Status == "Completed" { + podReady = podReady + 1 + } else { + podNotReady = podNotReady + 1 + LogLevel("DEBUG", "Pod Not Ready. Pod details: Name: %s Status: %s", pod.Name, pod.Status) + } + } + if podReady+podNotReady != len(pods) { + LogLevel("DEBUG", "Length of pods %d != Ready pods: %d + Not Ready Pods: %d", len(pods), podReady, podNotReady) + } + if podNotReady == 0 { + podsRunningStatus = true + } + return podsRunningStatus +} + +// Wait for pods to reach running state. +func WaitForPodsRunning(defaultTime time.Duration, times int, print bool) error { + var podsRunning bool + for i := 1; i <= times; i++ { + time.Sleep(defaultTime * time.Second) + podsRunning = checkPodStatus(print) + if podsRunning { + LogLevel("DEBUG", "All pods are up. Exiting sleep cycle after: %d seconds", i*int(defaultTime)) + break + } + } + if !podsRunning { + ReturnLogError("All pods were not up at the end of wait period %d", int(defaultTime)*times) + } return nil } diff --git a/shared/product.go b/shared/product.go index d135bf3b..c254e285 100644 --- a/shared/product.go +++ b/shared/product.go @@ -91,10 +91,13 @@ func ManageService(product, action, nodeType string, ips []string) (string, erro if getError != nil { return ip, getError } - _, err := RunCommandOnNode(cmd, ip) + manageServiceOut, err := RunCommandOnNode(cmd, ip) if err != nil { return ip, err } + if manageServiceOut != "" { + LogLevel("DEBUG", "service %s output: \n %s", action, manageServiceOut) + } } return "", nil @@ -104,10 +107,11 @@ func ManageService(product, action, nodeType string, ips []string) (string, erro func CertRotate(product string, ips []string) (string, error) { for _, ip := range ips { cmd := fmt.Sprintf("sudo %s certificate rotate", product) - _, err := RunCommandOnNode(cmd, ip) + certRotateOut, err := RunCommandOnNode(cmd, ip) if err != nil { return ip, err } + LogLevel("DEBUG", "On %s, cert rotate output:\n %s", ip, certRotateOut) } return "", nil @@ -126,11 +130,11 @@ func SecretEncryptOps(action, ip, product string) (string, error) { secretsEncryptStdOut, err := RunCommandOnNode(secretEncryptCmd[action], ip) if err != nil { - return "", ReturnLogError(fmt.Sprintf("FATAL: secrets-encryption %s action failed", action), err) + return "", ReturnLogError(fmt.Sprintf("secrets-encryption %s action failed", action), err) } if strings.Contains(secretsEncryptStdOut, "fatal") { - return "", ReturnLogError(fmt.Sprintf("FATAL: secrets-encryption %s action failed", action)) + return "", ReturnLogError(fmt.Sprintf("secrets-encryption %s action failed", action)) } - + LogLevel("DEBUG", "%s output:\n %s", action, secretsEncryptStdOut) return secretsEncryptStdOut, nil }