Skip to content

Commit

Permalink
Secrets-Encryption - restart primary etcd first in a 3 etcd setup (ra…
Browse files Browse the repository at this point in the history
…ncher#194)

* Adding 5 second sleep time for prepare action

* adding debug log

* adding more time

* adding extra loop for restart servers

* adding 30 sec before restarts

* remove unwanted lines

* update for rke2 times

* up time for rke2

* update restart loops for prdt

* change for loop type

* restart primary etcd first

* update review comments

* change condition check

* review fix

* review comment fix

* fix lint

* fix lint

* fix lint

* lint fix

* lint fix

* code order change

* review comment fix

* remove unwanted lines

* fixing review comments

* Update for loop for hashmap wait

* Fixing log lines and time

* rename var
  • Loading branch information
aganesh-suse authored Dec 13, 2024
1 parent 489547e commit 88dd6d8
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 52 deletions.
26 changes: 9 additions & 17 deletions entrypoint/secretsencrypt/encryption_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package secretsencrypt

import (
"flag"
"fmt"
"os"
"strings"
"testing"
Expand Down Expand Up @@ -34,6 +33,8 @@ func TestMain(m *testing.M) {
os.Exit(1)
}

validateSecretsEncryptFlag()

kubeconfig = os.Getenv("KUBE_CONFIG")
if kubeconfig == "" {
// gets a cluster from terraform.
Expand All @@ -51,23 +52,14 @@ func TestSecretsEncryptionSuite(t *testing.T) {
RunSpecs(t, "Secrets Encryption Test Suite")
}

var _ = BeforeSuite(func() {
if cluster.Config.Product == "k3s" {
Expect(os.Getenv("server_flags")).To(ContainSubstring("secrets-encryption:"),
"ERROR: Add secrets-encryption:true to server_flags for this test")
func validateSecretsEncryptFlag() {
if cfg.Product == "k3s" {
if !strings.Contains(os.Getenv("server_flags"), "secrets-encryption:") {
shared.LogLevel("error", "Add secrets-encryption:true to server_flags for this test")
os.Exit(1)
}
}

version := os.Getenv(fmt.Sprintf("%s_version", cluster.Config.Product))

var envErr error
if strings.Contains(version, "1.27") || strings.Contains(version, "1.26") {
envErr = os.Setenv("TEST_TYPE", "classic")
Expect(envErr).To(BeNil(), fmt.Sprintf("error setting env var: %v\n", envErr))
} else {
envErr = os.Setenv("TEST_TYPE", "both")
Expect(envErr).To(BeNil(), fmt.Sprintf("error setting env var: %v\n", envErr))
}
})
}

var _ = ReportAfterSuite("Secrets Encryption Test Suite", func(report Report) {
// Add Qase reporting capabilities.
Expand Down
2 changes: 1 addition & 1 deletion entrypoint/secretsencrypt/encryption_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ var _ = Describe("Test:", func() {
})

It("Validate Secrets Encryption", func() {
testcase.TestSecretsEncryption()
testcase.TestSecretsEncryption(cluster)
})

It("Validate Nodes", func() {
Expand Down
2 changes: 1 addition & 1 deletion pkg/template/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func addTestCaseMap(cluster *shared.Cluster, k8sClient *k8s.Client) map[string]t
testcase.TestCertRotate(cluster)
},
"TestSecretsEncryption": func(applyWorkload, deleteWorkload bool) {
testcase.TestSecretsEncryption()
testcase.TestSecretsEncryption(cluster)
},
"TestRestartService": func(applyWorkload, deleteWorkload bool) {
testcase.TestRestartService(cluster)
Expand Down
85 changes: 52 additions & 33 deletions pkg/testcase/secretsencrypt.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package testcase

import (
"fmt"
"os"
"strings"
"time"

Expand All @@ -11,7 +10,10 @@ import (
. "github.com/onsi/gomega"
)

func TestSecretsEncryption() {
func TestSecretsEncryption(cluster *shared.Cluster) {
Expect(cluster.Status).To(Equal("cluster created"))
Expect(cluster.ServerIPs).ShouldNot(BeEmpty())

nodes, errGetNodes := shared.GetNodesByRoles("etcd", "control-plane")
Expect(nodes).NotTo(BeEmpty())
Expect(errGetNodes).NotTo(HaveOccurred(), "error getting etcd/control-plane nodes")
Expand All @@ -22,63 +24,75 @@ func TestSecretsEncryption() {
errSecret := shared.CreateSecret("secret1", "default")
Expect(errSecret).NotTo(HaveOccurred(), "error creating secret")

shared.LogLevel("info", "TEST: 'CLASSIC' Secrets Encryption method")

index := len(nodes) - 1
cpIp := nodes[index].ExternalIP
secretsEncryptOps("prepare", product, cpIp, nodes)
secretsEncryptOps("rotate", product, cpIp, nodes)
secretsEncryptOps("reencrypt", product, cpIp, nodes)

if strings.Contains(os.Getenv("TEST_TYPE"), "both") {
shared.LogLevel("info", "TEST: 'NEW' Secrets Encryption method")
secretsEncryptOps("rotate-keys", product, cpIp, nodes)
}
shared.LogLevel("info", "TEST: Old Method of Secrets-Encryption")
secretsEncryptOps("prepare", product, cluster.ServerIPs[0], cpIp, nodes)
secretsEncryptOps("rotate", product, cluster.ServerIPs[0], cpIp, nodes)
secretsEncryptOps("reencrypt", product, cluster.ServerIPs[0], cpIp, nodes)
shared.LogLevel("info", "TEST: New Method of Secrets-Encryption")
secretsEncryptOps("rotate-keys", product, cluster.ServerIPs[0], cpIp, nodes)
}

func secretsEncryptOps(action, product, cpIP string, nodes []shared.Node) {
shared.LogLevel("info", "TEST: Secrets-Encryption: %v", action)
func secretsEncryptOps(action, product, primaryNodeIp, cpIP string, nodes []shared.Node) {
shared.LogLevel("info", "TEST: Secrets-Encryption: %v starts", action)

_, errStatusB4 := shared.SecretEncryptOps("status", cpIP, product)
Expect(errStatusB4).NotTo(HaveOccurred(), "error getting secret-encryption status before action")

stdOutput, err := shared.SecretEncryptOps(action, cpIP, product)
Expect(err).NotTo(HaveOccurred(), "error: secret-encryption: %v", action)
verifyActionStdOut(action, stdOutput)

if (action == "reencrypt") || (action == "rotate-keys") {
shared.LogLevel("DEBUG", "reencrypt op needs some time to complete - Sleep for 20 seconds before service restarts")
time.Sleep(20 * time.Second) // Wait for reencrypt action to complete before restarting services.
}

// Restart Primary Etcd Node First
restartServerAndWait(primaryNodeIp, product)

// Restart all other server nodes - etcd and control plane
for _, node := range nodes {
nodearr := []string{node.ExternalIP}
nodeIP, errRestart := shared.ManageService(product, "restart", "server", nodearr)
Expect(errRestart).NotTo(HaveOccurred(), "error restart service for node: "+nodeIP)
// Order of reboot matters. Etcd first then control plane nodes.
// Little lag needed between node restarts to avoid issues.
time.Sleep(30 * time.Second)
waitEtcdErr := shared.WaitForPodsRunning(10, 3)
if waitEtcdErr != nil {
shared.LogLevel("WARN", "pods not up after 30 seconds.")
if node.ExternalIP == primaryNodeIp {
continue
}
restartServerAndWait(node.ExternalIP, product)
}

switch product {
case "k3s":
waitPodsErr := shared.WaitForPodsRunning(10, 3)
if waitPodsErr != nil {
shared.LogLevel("WARN", "pods not up after 30 seconds")
shared.LogLevel("warn", "pods not up after 30 seconds")
}
case "rke2":
waitPodsErr := shared.WaitForPodsRunning(10, 6)
if waitPodsErr != nil {
shared.LogLevel("WARN", "pods not up after 60 seconds")
shared.LogLevel("warn", "pods not up after 60 seconds")
}
}

secretEncryptStatus, errGetStatus := waitForHashMatch(cpIP, product)
Expect(errGetStatus).NotTo(HaveOccurred(), "error getting secret-encryption status")
verifyStatusStdOut(action, secretEncryptStatus)

errLog := logEncryptionFileContents(nodes, product)
errLog := logEncryptionFileContents(nodes, action, product)
Expect(errLog).NotTo(HaveOccurred())
shared.LogLevel("debug", "TEST: Secrets-Encryption: %s is completed", action)
}

func restartServerAndWait(ip, product string) {
nodearr := []string{ip}
nodeIP, errRestart := shared.ManageService(product, "restart", "server", nodearr)
Expect(errRestart).NotTo(HaveOccurred(), "error restart service for node: "+nodeIP)
// Little lag needed between node restarts to avoid issues.
shared.LogLevel("debug", "Sleep for 30 seconds before service restarts between servers")
time.Sleep(30 * time.Second)
waitEtcdErr := shared.WaitForPodsRunning(10, 3)
if waitEtcdErr != nil {
shared.LogLevel("warn", "pods not up after 30 seconds.")
}
}

func waitForHashMatch(cpIP, product string) (string, error) {
Expand All @@ -87,19 +101,19 @@ func waitForHashMatch(cpIP, product string) (string, error) {
times := 6 * 3
var secretEncryptStatus string
var errGetStatus error
for i := 1; i <= times; i++ {
for i := 0; i < times; i++ {
secretEncryptStatus, errGetStatus = shared.SecretEncryptOps("status", cpIP, product)
if errGetStatus != nil {
shared.LogLevel("DEBUG", "error getting secret-encryption status. Retry.")
shared.LogLevel("debug", "error getting secret-encryption status. Retry.")
}
if secretEncryptStatus != "" && strings.Contains(secretEncryptStatus, "All hashes match") {
shared.LogLevel("DEBUG", "Total sleep time before hashes matched: %d seconds", i*int(defaultTime))
shared.LogLevel("debug", "Hash matched after: %d seconds", i*int(defaultTime))

return secretEncryptStatus, nil
}
time.Sleep(defaultTime * time.Second)
}
shared.LogLevel("WARN", "Hashes did not match after %d seconds", times*int(defaultTime))
shared.LogLevel("warn", "Hashes did not match after %d seconds", times*int(defaultTime))

return secretEncryptStatus, errGetStatus
}
Expand Down Expand Up @@ -138,7 +152,7 @@ func verifyStatusStdOut(action, stdout string) {
}
}

func logEncryptionFileContents(nodes []shared.Node, product string) error {
func logEncryptionFileContents(nodes []shared.Node, action, product string) error {
configFile := fmt.Sprintf("/var/lib/rancher/%s/server/cred/encryption-config.json", product)
stateFile := fmt.Sprintf("/var/lib/rancher/%s/server/cred/encryption-state.json", product)
cmdShowConfig := "sudo cat " + configFile
Expand All @@ -150,15 +164,20 @@ func logEncryptionFileContents(nodes []shared.Node, product string) error {
if errConfig != nil {
return shared.ReturnLogError("error cat of %v", configFile)
}
shared.LogLevel("DEBUG", "cat %s:\n %s", configFile, configStdOut)
shared.LogLevel("debug", "cat %s:\n %s", configFile, configStdOut)
currentTime := time.Now()
Expect(configStdOut).To(ContainSubstring("aescbckey-" + currentTime.Format("2006-01-02")))

stateOut, errState := shared.RunCommandOnNode(cmdShowState, ip)
shared.LogLevel("DEBUG", "cat %s:\n %s", stateFile, stateOut)
shared.LogLevel("debug", "cat %s:\n %s", stateFile, stateOut)
if errState != nil {
return shared.ReturnLogError("error cat of %v", stateFile)
}
if (action == "reencrypt") || (action == "rotate-keys") {
Expect(stateOut).To(ContainSubstring("reencrypt_finished"))
} else {
Expect(stateOut).To(ContainSubstring(action))
}
}

return nil
Expand Down
1 change: 1 addition & 0 deletions shared/product.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ func ManageService(product, action, nodeType string, ips []string) (string, erro
}

for _, ip := range ips {
LogLevel("debug", "Performing systemctl %s on %s", action, ip)
cmd, getError := SystemCtlCmd(product, action, nodeType)
if getError != nil {
return ip, getError
Expand Down

0 comments on commit 88dd6d8

Please sign in to comment.