From 8ed47ded76f60aa4aa000bd5d43d7699a3caba0d Mon Sep 17 00:00:00 2001 From: Jigisha Patil <89548848+jigisha620@users.noreply.github.com> Date: Fri, 10 May 2024 07:45:55 -0700 Subject: [PATCH] revert changes made in #1164, #1170 and #1172 (#1243) --- pkg/cloudprovider/types.go | 30 ----------------- .../node/termination/controller.go | 6 ---- .../node/termination/suite_test.go | 24 -------------- .../nodeclaim/termination/controller.go | 6 ---- .../nodeclaim/termination/suite_test.go | 33 ------------------- 5 files changed, 99 deletions(-) diff --git a/pkg/cloudprovider/types.go b/pkg/cloudprovider/types.go index dbc359ead6..2fc8b4720b 100644 --- a/pkg/cloudprovider/types.go +++ b/pkg/cloudprovider/types.go @@ -268,33 +268,3 @@ func IgnoreNodeClassNotReadyError(err error) error { } return err } - -// RetryableError is an error type returned by CloudProviders when the action emitting the error has to be retried -type RetryableError struct { - error -} - -func NewRetryableError(err error) *RetryableError { - return &RetryableError{ - error: err, - } -} - -func (e *RetryableError) Error() string { - return fmt.Sprintf("retryable error, %s", e.error) -} - -func IsRetryableError(err error) bool { - if err == nil { - return false - } - var retryableError *RetryableError - return errors.As(err, &retryableError) -} - -func IgnoreRetryableError(err error) error { - if IsRetryableError(err) { - return nil - } - return err -} diff --git a/pkg/controllers/node/termination/controller.go b/pkg/controllers/node/termination/controller.go index f92a9ed2af..da576a9cd6 100644 --- a/pkg/controllers/node/termination/controller.go +++ b/pkg/controllers/node/termination/controller.go @@ -108,12 +108,6 @@ func (c *Controller) Finalize(ctx context.Context, node *v1.Node) (reconcile.Res // This delete call is needed so that we ensure that we don't remove the node from the cluster // until the full instance shutdown has taken place if err := c.cloudProvider.Delete(ctx, nodeclaimutil.NewFromNode(node)); cloudprovider.IgnoreNodeClaimNotFoundError(err) != nil { - // We expect cloudProvider to emit a Retryable Error when the underlying instance is not terminated and if that - // happens, we want to re-enqueue reconciliation until we terminate the underlying instance before removing - // finalizer from the node. - if cloudprovider.IsRetryableError(err) { - return reconcile.Result{RequeueAfter: 10 * time.Second}, nil - } return reconcile.Result{}, fmt.Errorf("terminating cloudprovider instance, %w", err) } if err := c.removeFinalizer(ctx, node); err != nil { diff --git a/pkg/controllers/node/termination/suite_test.go b/pkg/controllers/node/termination/suite_test.go index 3042ffe8ad..577b1ec708 100644 --- a/pkg/controllers/node/termination/suite_test.go +++ b/pkg/controllers/node/termination/suite_test.go @@ -18,7 +18,6 @@ package termination_test import ( "context" - "fmt" "sync" "testing" "time" @@ -26,8 +25,6 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "sigs.k8s.io/karpenter/pkg/cloudprovider" - "github.com/samber/lo" clock "k8s.io/utils/clock/testing" "sigs.k8s.io/controller-runtime/pkg/client" @@ -718,27 +715,6 @@ var _ = Describe("Termination", func() { ExpectMetricGaugeValue(terminator.EvictionQueueDepth, 5, map[string]string{}) }) - It("should retry node deletion when cloudProvider returns retryable error", func() { - ExpectApplied(ctx, env.Client, node) - cloudProvider.NextDeleteErr = cloudprovider.NewRetryableError(fmt.Errorf("underlying instance not terminated")) - - // Expect the node to stick around and requeue while it gets a retryable error - Expect(env.Client.Delete(ctx, node)).To(Succeed()) - result := ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(node)) - Expect(result.RequeueAfter).To(Equal(time.Second * 10)) - - // Instance still exists - _, err := cloudProvider.Get(ctx, node.Spec.ProviderID) - Expect(err).ToNot(HaveOccurred()) - - // Requeue again and we should no longer get an error and succeed to delete the instance - ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(node)) // re-enqueue reconciliation since we got retryable error previously - ExpectNotFound(ctx, env.Client, node) - - // Expect the instance to be gone from the cloudprovider - _, err = cloudProvider.Get(ctx, node.Spec.ProviderID) - Expect(cloudprovider.IsNodeClaimNotFoundError(err)).To(BeTrue()) - }) }) }) diff --git a/pkg/controllers/nodeclaim/termination/controller.go b/pkg/controllers/nodeclaim/termination/controller.go index ae1cf978be..6e6b4f0d3a 100644 --- a/pkg/controllers/nodeclaim/termination/controller.go +++ b/pkg/controllers/nodeclaim/termination/controller.go @@ -90,12 +90,6 @@ func (c *Controller) Finalize(ctx context.Context, nodeClaim *v1beta1.NodeClaim) } if nodeClaim.Status.ProviderID != "" { if err = c.cloudProvider.Delete(ctx, nodeClaim); cloudprovider.IgnoreNodeClaimNotFoundError(err) != nil { - // We expect cloudProvider to emit a Retryable Error when the underlying instance is not terminated and if that - // happens, we want to re-enqueue reconciliation until we terminate the underlying instance before removing - // finalizer from the nodeClaim. - if cloudprovider.IsRetryableError(err) { - return reconcile.Result{RequeueAfter: 10 * time.Second}, nil - } return reconcile.Result{}, fmt.Errorf("terminating cloudprovider instance, %w", err) } } diff --git a/pkg/controllers/nodeclaim/termination/suite_test.go b/pkg/controllers/nodeclaim/termination/suite_test.go index 0d8b29150f..37dc79f5c2 100644 --- a/pkg/controllers/nodeclaim/termination/suite_test.go +++ b/pkg/controllers/nodeclaim/termination/suite_test.go @@ -18,7 +18,6 @@ package termination_test import ( "context" - "fmt" "testing" "time" @@ -217,36 +216,4 @@ var _ = Describe("Termination", func() { ExpectExists(ctx, env.Client, node) } }) - It("should retry nodeClaim deletion when cloudProvider returns retryable error", func() { - ExpectApplied(ctx, env.Client, nodePool, nodeClaim) - ExpectReconcileSucceeded(ctx, nodeClaimLifecycleController, client.ObjectKeyFromObject(nodeClaim)) - - nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) - _, err := cloudProvider.Get(ctx, nodeClaim.Status.ProviderID) - Expect(err).ToNot(HaveOccurred()) - - node := test.NodeClaimLinkedNode(nodeClaim) - ExpectApplied(ctx, env.Client, node) - - // Expect the node and the nodeClaim to both be gone - Expect(env.Client.Delete(ctx, nodeClaim)).To(Succeed()) - ExpectReconcileSucceeded(ctx, nodeClaimTerminationController, client.ObjectKeyFromObject(nodeClaim)) // triggers the node deletion - ExpectFinalizersRemoved(ctx, env.Client, node) - ExpectNotFound(ctx, env.Client, node) - - cloudProvider.NextDeleteErr = cloudprovider.NewRetryableError(fmt.Errorf("underlying instance not terminated")) - result := ExpectReconcileSucceeded(ctx, nodeClaimTerminationController, client.ObjectKeyFromObject(nodeClaim)) - Expect(result.RequeueAfter).To(Equal(time.Second * 10)) - - // Instance still exists - _, err = cloudProvider.Get(ctx, node.Spec.ProviderID) - Expect(err).ToNot(HaveOccurred()) - - ExpectReconcileSucceeded(ctx, nodeClaimTerminationController, client.ObjectKeyFromObject(nodeClaim)) //re-enqueue reconciliation since we got retryable error previously - ExpectNotFound(ctx, env.Client, nodeClaim, node) - - // Expect the nodeClaim to be gone from the cloudprovider - _, err = cloudProvider.Get(ctx, nodeClaim.Status.ProviderID) - Expect(cloudprovider.IsNodeClaimNotFoundError(err)).To(BeTrue()) - }) })