Skip to content

Commit

Permalink
Refactor proxy reset (#1253)
Browse files Browse the repository at this point in the history
* Initial

* State

* State

* configuration test suite

* State

* State

* State

* State

* State

* State

* State

* One more test

* More tests

* remove stdoutlogger

* State

* Docs + RN

* error on Kyma pod restart warnings

* Do not return error on warnings also do not log it

* Update internal/restarter/predicates/image_resources_test.go

Co-authored-by: Marek Kołodziejczak <[email protected]>

* Review adjustments

* Fix mustMatch and optional predicate case

* Improve logs on controller reconcile terminating for different cases

* Improve logs and requeue in a minuate for the customer sidecar restart error case (warning)

* Update docs/contributor/04-10-technical-design.md

Co-authored-by: Natalia Sitko <[email protected]>

* Update docs/contributor/04-10-technical-design.md

Co-authored-by: Natalia Sitko <[email protected]>

* Update docs/contributor/04-10-technical-design.md

Co-authored-by: Natalia Sitko <[email protected]>

* Update docs/release-notes/1.14.0.md

Co-authored-by: Natalia Sitko <[email protected]>

---------

Co-authored-by: Marek Kołodziejczak <[email protected]>
Co-authored-by: Natalia Sitko <[email protected]>
  • Loading branch information
3 people authored Jan 30, 2025
1 parent f99be84 commit b7c63aa
Show file tree
Hide file tree
Showing 40 changed files with 1,623 additions and 512 deletions.
29 changes: 20 additions & 9 deletions controllers/istio_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@ import (
"time"

"github.com/kyma-project/istio/operator/internal/restarter"
"github.com/kyma-project/istio/operator/internal/restarter/predicates"
"github.com/kyma-project/istio/operator/internal/validation"

"github.com/kyma-project/istio/operator/internal/filter"
"github.com/kyma-project/istio/operator/pkg/lib/sidecars"
"github.com/kyma-project/istio/operator/pkg/lib/sidecars/pods"
"github.com/kyma-project/istio/operator/pkg/lib/sidecars/restart"

"github.com/kyma-project/istio/operator/internal/described_errors"
"github.com/kyma-project/istio/operator/internal/reconciliations/istio/configuration"
"github.com/kyma-project/istio/operator/internal/reconciliations/istio_resources"
"github.com/kyma-project/istio/operator/internal/status"
"k8s.io/client-go/util/retry"
Expand Down Expand Up @@ -56,9 +59,12 @@ func NewController(mgr manager.Manager, reconciliationInterval time.Duration) *I
merger := istiooperator.NewDefaultIstioMerger()

statusHandler := status.NewStatusHandler(mgr.GetClient())
logger := mgr.GetLogger()
podsLister := pods.NewPods(mgr.GetClient(), &logger)
actionRestarter := restart.NewActionRestarter(mgr.GetClient(), &logger)
restarters := []restarter.Restarter{
restarter.NewIngressGatewayRestarter(mgr.GetClient(), []filter.IngressGatewayPredicate{}, statusHandler),
restarter.NewSidecarsRestarter(mgr.GetLogger(), mgr.GetClient(), &merger, sidecars.NewProxyResetter(), statusHandler),
restarter.NewIngressGatewayRestarter(mgr.GetClient(), []predicates.IngressGatewayPredicate{}, statusHandler),
restarter.NewSidecarsRestarter(mgr.GetLogger(), mgr.GetClient(), &merger, sidecars.NewProxyRestarter(mgr.GetClient(), podsLister, actionRestarter, &logger), statusHandler),
}

return &IstioReconciler{
Expand Down Expand Up @@ -153,10 +159,15 @@ func (r *IstioReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
if statusUpdateErr != nil {
r.log.Error(statusUpdateErr, "Error during updating status to error")
}
if err.Level() == described_errors.Warning {
r.log.Info("Reconcile requeued")
return ctrl.Result{RequeueAfter: time.Minute * 1}, nil
}
r.log.Info("Reconcile failed")
return ctrl.Result{}, err
} else if requeue {
r.statusHandler.SetCondition(&istioCR, operatorv1alpha2.NewReasonWithMessage(operatorv1alpha2.ConditionReasonReconcileRequeued))
return r.requeueReconciliationWithoutError(ctx, &istioCR)
return r.requeueReconciliationRestartNotFinished(ctx, &istioCR)
}

return r.finishReconcile(ctx, &istioCR, istioImageVersion.Tag())
Expand All @@ -171,17 +182,17 @@ func (r *IstioReconciler) requeueReconciliation(ctx context.Context, istioCR *op
if statusUpdateErr != nil {
r.log.Error(statusUpdateErr, "Error during updating status to error")
}
r.log.Error(err, "Reconcile failed")
r.log.Info("Reconcile failed")
return ctrl.Result{}, err
}

func (r *IstioReconciler) requeueReconciliationWithoutError(ctx context.Context, istioCR *operatorv1alpha2.Istio) (ctrl.Result, error) {
func (r *IstioReconciler) requeueReconciliationRestartNotFinished(ctx context.Context, istioCR *operatorv1alpha2.Istio) (ctrl.Result, error) {
statusUpdateErr := r.statusHandler.UpdateToProcessing(ctx, istioCR)
if statusUpdateErr != nil {
r.log.Error(statusUpdateErr, "Error during updating status to error")
r.log.Error(statusUpdateErr, "Error during updating status to processing")
}
r.log.Info("Reconcile requeued")
return ctrl.Result{Requeue: true, RequeueAfter: time.Minute * 1}, nil
return ctrl.Result{RequeueAfter: time.Minute * 1}, nil
}

// terminateReconciliation stops the reconciliation and does not requeue the request.
Expand Down Expand Up @@ -276,7 +287,7 @@ func (r *IstioReconciler) updateLastAppliedConfiguration(ctx context.Context, ob
if err := r.Client.Get(ctx, objectKey, &lacIstioCR); err != nil {
return err
}
lastAppliedErr := istio.UpdateLastAppliedConfiguration(&lacIstioCR, istioTag)
lastAppliedErr := configuration.UpdateLastAppliedConfiguration(&lacIstioCR, istioTag)
if lastAppliedErr != nil {
return lastAppliedErr
}
Expand Down
7 changes: 4 additions & 3 deletions controllers/istio_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -941,8 +941,9 @@ var _ = Describe("Istio Controller", func() {
result, err := sut.Reconcile(context.Background(), reconcile.Request{NamespacedName: types.NamespacedName{Namespace: testNamespace, Name: istioCrName}})

// then
Expect(err).To(HaveOccurred())
Expect(err).ToNot(HaveOccurred())
Expect(result.Requeue).To(BeFalse())
Expect(result.RequeueAfter).To(Equal(time.Minute * 1))

updatedIstioCR := operatorv1alpha2.Istio{}
err = fakeClient.Get(context.Background(), client.ObjectKeyFromObject(istioCR), &updatedIstioCR)
Expand Down Expand Up @@ -985,7 +986,7 @@ var _ = Describe("Istio Controller", func() {

//then
Expect(err).ToNot(HaveOccurred())
Expect(reconcileResult.Requeue).To(BeTrue())
Expect(reconcileResult.Requeue).To(BeFalse())
Expect(reconcileResult.RequeueAfter).To(Equal(time.Minute * 1))

Expect(ingressGatewayRestarter.RestartCalled()).To(BeTrue())
Expand Down Expand Up @@ -1043,7 +1044,7 @@ type shouldFailClient struct {

func (p *shouldFailClient) List(ctx context.Context, list client.ObjectList, _ ...client.ListOption) error {
if p.FailOnList {
return errors.New("intentionally failing client on list")
return errors.New("intentionally failing client on client.List")
}
return p.Client.List(ctx, list)
}
Expand Down
9 changes: 7 additions & 2 deletions docs/contributor/04-10-technical-design.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,16 @@ The predicate initiates a restart of the sidecar and Ingress Gateway if the targ
The SidecarsRestarter is responsible for keeping the proxy sidecars in the desired state. It restarts Pods that are in the `Running` state, are part of the service mesh, and have the annotation `sidecar.istio.io/status`.
The Istio CR and the [Istio version](#istio-version) represent the desired state. Pods are restarted in chunks with limits on the number that can be restarted in one reconciliation and the number that can be listed when requesting from the Kubernetes API Server. If the number of Pods that must be restarted exceeds the limits, it happens in the next reconciliation. In such a case, the reconciliation request is requeued with a 1-minute delay to allow time for the Kubernetes scheduler to restart the Deployments.

During the proxy sidecars restarting phase, the Istio CR remains in the `Processing` state having the following status conditions:
Restarting sidecars is divided into two phases:
- In the first phase, only Kyma workloads are restarted. A workload is considered a Kyma workload if it runs in the `kyma-system` namespace or has the `kyma-project.io/module` annotation. All Kyma workloads are restarted without pagination. If there is a problem with the restart, Istio CR is set to the `Error`, and the reconciliation is requeued.
- In the second phase, only customer workloads are restarted. A workload is considered a customer workload if it does not run in the `kyma-system` namespace and does not have the `kyma-project.io/module` annotation. All customer workloads are restarted with pagination. If there is a problem with the restart, Istio CR is set to the `Warning` state, and the reconciliation is requeued with a 1-minute delay.

During the customer sidecars restarting phase, the Istio CR remains in the `Processing` state, having the following status conditions:
- The `Ready` condition is set to `false` with the reason `ReconcileRequeued`.
- The `ProxySidecarRestartSucceeded` condition is set to `false` with the reason `ProxySidecarPartiallySucceeded`.

After completing the customer sidecars restart, Istio CR's `Ready` condition is set to `true`, and `ProxySidecarRestartSucceeded` is set to `true` with the reason `ProxySidecarRestartSucceeded`.

This component covers the following restart triggers:

- Restart Pods with proxy sidecar when CNI config changes.
Expand All @@ -128,4 +134,3 @@ This component covers the following restart triggers:
### IngressGatewayRestarter

IngressGateway Restarter is responsible for restarting Istio Ingress Gateway. The component consumes a list of [Restart Predicates](#restart-predicates) that determine when the restart should occur. Restarter triggers the restart if there's a change in the `numTrustedProxies` configuration.

3 changes: 3 additions & 0 deletions docs/release-notes/1.14.0.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## New Features

- Restarting sidecars is now divided into two phases. In the first phase, only Kyma workloads are restarted. If this phase fails, the Istio Custom Resource (CR) is set to the `Error` state. In the second phase, customer workloads are restarted in chunks. If any iteration of this phase fails, the Istio CR is set to the `Warning` state.
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package istio
package configuration

import (
"encoding/json"
"fmt"

"github.com/kyma-project/istio/operator/api/v1alpha2"
"github.com/kyma-project/istio/operator/pkg/labels"

Expand Down Expand Up @@ -51,7 +52,7 @@ func GetLastAppliedConfiguration(istioCR *v1alpha2.Istio) (AppliedConfig, error)
return lastAppliedConfig, nil
}

func checkIstioVersionUpdate(currentIstioVersionString, targetIstioVersionString string) error {
func CheckIstioVersionUpdate(currentIstioVersionString, targetIstioVersionString string) error {
currentIstioVersion, err := semver.NewVersion(currentIstioVersionString)
if err != nil {
return err
Expand Down
88 changes: 88 additions & 0 deletions internal/reconciliations/istio/configuration/configuration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package configuration_test

import (
"fmt"
"testing"

operatorv1alpha2 "github.com/kyma-project/istio/operator/api/v1alpha2"
"github.com/kyma-project/istio/operator/internal/reconciliations/istio/configuration"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

"github.com/kyma-project/istio/operator/internal/tests"
"github.com/onsi/ginkgo/v2/types"
)

const (
mockIstioTag string = "1.16.1-distroless"
lastAppliedConfiguration string = "operator.kyma-project.io/lastAppliedConfiguration"
)

func TestRestarter(t *testing.T) {
RegisterFailHandler(Fail)

RunSpecs(t, "Istio Configuration Suite")
}

var _ = ReportAfterSuite("custom reporter", func(report types.Report) {
tests.GenerateGinkgoJunitReport("istio-configuration-suite", report)
})

var _ = Describe("Istio Configuration", func() {
Context("LastAppliedConfiguration", func() {
It("should update lastAppliedConfiguration and is able to unmarshal it back from annotation", func() {
// given
numTrustedProxies := 1
istioCR := operatorv1alpha2.Istio{Spec: operatorv1alpha2.IstioSpec{Config: operatorv1alpha2.Config{NumTrustedProxies: &numTrustedProxies}}}

// when
err := configuration.UpdateLastAppliedConfiguration(&istioCR, mockIstioTag)

// then
Expect(err).ShouldNot(HaveOccurred())
Expect(istioCR.Annotations).To(Not(BeEmpty()))
Expect(istioCR.Annotations[lastAppliedConfiguration]).To(Equal(fmt.Sprintf(`{"config":{"numTrustedProxies":1},"IstioTag":"%s"}`, mockIstioTag)))

appliedConfig, err := configuration.GetLastAppliedConfiguration(&istioCR)

Expect(err).ShouldNot(HaveOccurred())
Expect(*appliedConfig.Config.NumTrustedProxies).To(Equal(1))
})
})

Context("CheckIstioVersionUpdate", func() {
It("should return nil when target version is the same as current version", func() {
err := configuration.CheckIstioVersionUpdate("1.10.0", "1.10.0")
Expect(err).ShouldNot(HaveOccurred())
})

It("should return nil when target version is one minor version higher than current version", func() {
err := configuration.CheckIstioVersionUpdate("1.10.0", "1.11.0")
Expect(err).ShouldNot(HaveOccurred())
})

It("should return error when target version is lower than current version", func() {
err := configuration.CheckIstioVersionUpdate("1.11.0", "1.10.0")
Expect(err).Should(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("downgrade not supported"))
})

It("should return error when target version is more than one minor version higher than current version", func() {
err := configuration.CheckIstioVersionUpdate("1.10.0", "1.12.0")
Expect(err).Should(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("the difference between versions exceed one minor version"))
})

It("should return error when target version has a different major version", func() {
err := configuration.CheckIstioVersionUpdate("1.10.0", "2.10.0")
Expect(err).Should(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("major version upgrade is not supported"))
})

It("should return nil when target version is a pre-release of the same version", func() {
err := configuration.CheckIstioVersionUpdate("1.10.0", "1.10.0-beta.1")
Expect(err).ShouldNot(HaveOccurred())
})
})
})
38 changes: 0 additions & 38 deletions internal/reconciliations/istio/configuration_test.go

This file was deleted.

6 changes: 4 additions & 2 deletions internal/reconciliations/istio/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ package istio

import (
"context"

operatorv1alpha2 "github.com/kyma-project/istio/operator/api/v1alpha2"
"github.com/kyma-project/istio/operator/internal/clusterconfig"
"github.com/kyma-project/istio/operator/internal/described_errors"
"github.com/kyma-project/istio/operator/internal/istiooperator"
"github.com/kyma-project/istio/operator/internal/reconciliations/istio/configuration"
"github.com/kyma-project/istio/operator/internal/status"
"github.com/kyma-project/istio/operator/internal/webhooks"
"github.com/kyma-project/istio/operator/pkg/labels"
Expand Down Expand Up @@ -35,13 +37,13 @@ func installIstio(ctx context.Context, args installArgs) (istiooperator.IstioIma
ctrl.Log.Info("Starting Istio install", "istio version", istioImageVersion.Version())

if _, ok := istioCR.Annotations[labels.LastAppliedConfiguration]; ok {
lastAppliedConfig, err := GetLastAppliedConfiguration(istioCR)
lastAppliedConfig, err := configuration.GetLastAppliedConfiguration(istioCR)
if err != nil {
ctrl.Log.Error(err, "Error evaluating Istio CR changes")
return istioImageVersion, described_errors.NewDescribedError(err, "Istio install check failed")
}

if err := checkIstioVersionUpdate(lastAppliedConfig.IstioTag, istioImageVersion.Tag()); err != nil {
if err := configuration.CheckIstioVersionUpdate(lastAppliedConfig.IstioTag, istioImageVersion.Tag()); err != nil {
statusHandler.SetCondition(istioCR, operatorv1alpha2.NewReasonWithMessage(operatorv1alpha2.ConditionReasonIstioVersionUpdateNotAllowed))
// We are already updating the condition, that's why we need to avoid another condition update by applying SetCondition(false)
return istioImageVersion, described_errors.NewDescribedError(err, "Istio version update is not allowed").SetWarning().SetCondition(false)
Expand Down
4 changes: 2 additions & 2 deletions internal/reconciliations/istio/uninstall.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"github.com/kyma-project/istio/operator/internal/istiooperator"
"github.com/kyma-project/istio/operator/internal/resources"
"github.com/kyma-project/istio/operator/internal/status"
sidecarRemover "github.com/kyma-project/istio/operator/pkg/lib/sidecars/remove"
"github.com/kyma-project/istio/operator/pkg/lib/sidecars/remove"
"github.com/thoas/go-funk"

ctrl "sigs.k8s.io/controller-runtime"
Expand Down Expand Up @@ -58,7 +58,7 @@ func uninstallIstio(ctx context.Context, args uninstallArgs) (istiooperator.Isti
return istioImageVersion, described_errors.NewDescribedError(err, "Could not uninstall istio")
}

warnings, err := sidecarRemover.RemoveSidecars(ctx, k8sClient, &ctrl.Log)
warnings, err := remove.RemoveSidecars(ctx, k8sClient, &ctrl.Log)
if err != nil {
return istioImageVersion, described_errors.NewDescribedError(err, "Could not remove istio sidecars")
}
Expand Down
9 changes: 4 additions & 5 deletions internal/restarter/ingress_gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@ import (

"github.com/kyma-project/istio/operator/api/v1alpha2"
"github.com/kyma-project/istio/operator/internal/described_errors"
"github.com/kyma-project/istio/operator/internal/filter"
"github.com/kyma-project/istio/operator/internal/restarter/predicates"
"github.com/kyma-project/istio/operator/internal/status"
"github.com/kyma-project/istio/operator/pkg/lib/annotations"
"github.com/kyma-project/istio/operator/pkg/lib/ingressgateway"
"github.com/kyma-project/istio/operator/pkg/lib/sidecars/retry"
appsv1 "k8s.io/api/apps/v1"
"k8s.io/apimachinery/pkg/types"
Expand All @@ -25,11 +24,11 @@ const (

type IngressGatewayRestarter struct {
client client.Client
predicates []filter.IngressGatewayPredicate
predicates []predicates.IngressGatewayPredicate
statusHandler status.Status
}

func NewIngressGatewayRestarter(client client.Client, predicates []filter.IngressGatewayPredicate, statusHandler status.Status) *IngressGatewayRestarter {
func NewIngressGatewayRestarter(client client.Client, predicates []predicates.IngressGatewayPredicate, statusHandler status.Status) *IngressGatewayRestarter {
return &IngressGatewayRestarter{
client: client,
predicates: predicates,
Expand All @@ -40,7 +39,7 @@ func NewIngressGatewayRestarter(client client.Client, predicates []filter.Ingres
func (r *IngressGatewayRestarter) Restart(ctx context.Context, istioCR *v1alpha2.Istio) (described_errors.DescribedError, bool) {
ctrl.Log.Info("Restarting Istio Ingress Gateway")

r.predicates = append(r.predicates, ingressgateway.NewRestartPredicate(istioCR))
r.predicates = append(r.predicates, predicates.NewIngressGatewayRestartPredicate(istioCR))
for _, predicate := range r.predicates {
evaluator, err := predicate.NewIngressGatewayEvaluator(ctx)
if err != nil {
Expand Down
Loading

0 comments on commit b7c63aa

Please sign in to comment.