From 272c48bd4591bcebab4981d905bc4223e004bd37 Mon Sep 17 00:00:00 2001 From: Timo Haas Date: Tue, 27 Dec 2022 20:33:18 +0100 Subject: [PATCH] feat: Add reboot required annotation Signed-off-by: Timo Haas --- cmd/kured/main.go | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/cmd/kured/main.go b/cmd/kured/main.go index 44c8ca9a6..83182a1f0 100644 --- a/cmd/kured/main.go +++ b/cmd/kured/main.go @@ -30,13 +30,13 @@ import ( "github.com/google/shlex" shoutrrr "github.com/containrrr/shoutrrr" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/kubereboot/kured/pkg/alerts" "github.com/kubereboot/kured/pkg/daemonsetlock" "github.com/kubereboot/kured/pkg/delaytick" "github.com/kubereboot/kured/pkg/taints" "github.com/kubereboot/kured/pkg/timewindow" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" ) var ( @@ -95,6 +95,8 @@ const ( KuredRebootInProgressAnnotation string = "weave.works/kured-reboot-in-progress" // KuredMostRecentRebootNeededAnnotation is the canonical string value for the kured most-recent-reboot-needed annotation KuredMostRecentRebootNeededAnnotation string = "weave.works/kured-most-recent-reboot-needed" + // KuredRebootRequiredAnnotation is the canonical string value for the kured reboot-required annotation + KuredRebootRequiredAnnotation string = "weave.works/reboot-required" // EnvPrefix The environment variable prefix of all environment variables bound to our command line flags. EnvPrefix = "KURED" ) @@ -566,13 +568,23 @@ func addNodeAnnotations(client *kubernetes.Clientset, nodeID string, annotations } func deleteNodeAnnotation(client *kubernetes.Clientset, nodeID, key string) error { + + node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{}) + if err != nil { + log.Errorf("Error deleting node annotation %s via k8s API: %v", key, err) + return err + } + if _, exists := node.Annotations[key]; !exists { + return nil + } + log.Infof("Deleting node %s annotation %s", nodeID, key) // JSON Patch takes as path input a JSON Pointer, defined in RFC6901 // So we replace all instances of "/" with "~1" as per: // https://tools.ietf.org/html/rfc6901#section-3 patch := []byte(fmt.Sprintf("[{\"op\":\"remove\",\"path\":\"/metadata/annotations/%s\"}]", strings.ReplaceAll(key, "/", "~1"))) - _, err := client.CoreV1().Nodes().Patch(context.TODO(), nodeID, types.JSONPatchType, patch, metav1.PatchOptions{}) + _, err = client.CoreV1().Nodes().Patch(context.TODO(), nodeID, types.JSONPatchType, patch, metav1.PatchOptions{}) if err != nil { log.Errorf("Error deleting node annotation %s via k8s API: %v", key, err) return err @@ -627,6 +639,12 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s source := rand.NewSource(time.Now().UnixNano()) tick := delaytick.New(source, 1*time.Minute) for range tick { + if !rebootRequired(sentinelCommand) { + err := deleteNodeAnnotation(client, nodeID, KuredRebootRequiredAnnotation) + if err != nil { + continue + } + } if holding(lock, &nodeMeta) { node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{}) if err != nil { @@ -652,11 +670,9 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s // And finally (3) if it has that annotation, to delete it. // This indicates to other node tools running on the cluster that this node may be a candidate for maintenance if annotateNodes && !rebootRequired(sentinelCommand) { - if _, ok := node.Annotations[KuredRebootInProgressAnnotation]; ok { - err := deleteNodeAnnotation(client, nodeID, KuredRebootInProgressAnnotation) - if err != nil { - continue - } + err := deleteNodeAnnotation(client, nodeID, KuredRebootInProgressAnnotation) + if err != nil { + continue } } throttle(releaseDelay) @@ -696,6 +712,14 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s } log.Infof("Reboot required") + // Annotate this node to indicate that "I want to be rebooted!" + // so that other node maintenance tools can move stateful workloads(in their respective maintenance windows) away + annotations := map[string]string{KuredRebootRequiredAnnotation: "true"} + err := addNodeAnnotations(client, nodeID, annotations) + if err != nil { + continue + } + var blockCheckers []RebootBlocker if prometheusURL != "" { blockCheckers = append(blockCheckers, PrometheusBlockingChecker{promClient: promClient, filter: alertFilter, firingOnly: alertFiringOnly})