From 5dffef9b88fef3ca109c9a2a8cf3c7c85563fe36 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Tue, 22 Feb 2022 19:04:44 -0800 Subject: [PATCH 01/40] Add tagging controller configuration --- .../v1/controllers/aws_controllermanager.go | 50 +++++++++++++++++++ .../controllers/tagging/tagging-controller.go | 43 ++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 pkg/providers/v1/controllers/aws_controllermanager.go create mode 100644 pkg/providers/v1/controllers/tagging/tagging-controller.go diff --git a/pkg/providers/v1/controllers/aws_controllermanager.go b/pkg/providers/v1/controllers/aws_controllermanager.go new file mode 100644 index 0000000000..c89c41f18a --- /dev/null +++ b/pkg/providers/v1/controllers/aws_controllermanager.go @@ -0,0 +1,50 @@ +package controllers + +import ( + "context" + "k8s.io/klog/v2" + + cloudprovider "k8s.io/cloud-provider" + "k8s.io/cloud-provider/app" + cloudcontrollerconfig "k8s.io/cloud-provider/app/config" + "k8s.io/controller-manager/controller" + genericcontrollermanager "k8s.io/controller-manager/app" + taggingcontroller "k8s.io/cloud-provider-aws/pkg/providers/v1/controllers/tagging" +) + +// BuildControllerInitializers is used to add new controllers built in this package to +// the existing list of controllers from cloud-provider +func BuildControllerInitializers() map[string]app.ControllerInitFuncConstructor { + controllerInitializers := app.DefaultInitFuncConstructors + + taggingControllerInitFuncConstrustor := app.ControllerInitFuncConstructor{ + InitContext: app.ControllerInitContext{ + ClientName: "tagging-controller", + }, + Constructor: startTaggingControllerWrapper, + } + + controllerInitializers["tagging"] = taggingControllerInitFuncConstrustor + + return controllerInitializers +} + +// StartCloudNodeControllerWrapper is used to take cloud cofig as input and start cloud node controller +func startTaggingControllerWrapper(initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) app.InitFunc { + return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) { + return startTaggingController(ctx, initContext, completedConfig, cloud) + } +} + +func startTaggingController(ctx context.Context, initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) (controller.Interface, bool, error) { + // Start the CloudNodeController + taggingcontroller, err := taggingcontroller.NewTaggingController() + if err != nil { + klog.Warningf("failed to start tagging controller: %s", err) + return nil, false, nil + } + + go taggingcontroller.Run(ctx.Done()) + + return nil, true, nil +} \ No newline at end of file diff --git a/pkg/providers/v1/controllers/tagging/tagging-controller.go b/pkg/providers/v1/controllers/tagging/tagging-controller.go new file mode 100644 index 0000000000..f63875e901 --- /dev/null +++ b/pkg/providers/v1/controllers/tagging/tagging-controller.go @@ -0,0 +1,43 @@ +/* +Copyright 2016 The Kubernetes Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tagging + +import ( + "k8s.io/klog/v2" +) + +// eksResourceTagPrefix is the prefix for tag to group resources that are used by eks +// for a particular cluster, this tag is added to the existing tags. +// Example: "Key1=Val1,aws:eks:cluster-name:my-cluster=Val2" +const eksResourceTagPrefix = "aws:eks:cluster-name:" + +// TaggingController is the controller implementation for tagging cluster resources +type TaggingController struct { +} + +// NewTaggingController creates a NewTaggingController object +func NewTaggingController() (*TaggingController, error) { + tc := &TaggingController{ + } + + return tc, nil +} + +// Run will start the controller to tag resources attached to a cluster +// at any point in time +func (tc *TaggingController) Run(stopCh <-chan struct{}) { + klog.Infof("Starting the TaggingController, eksResourceTagPrefix is %s.", eksResourceTagPrefix) + + <-stopCh +} \ No newline at end of file From 1b2dce1bdef35dd11768d54e1b286a1f9ec6e772 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Tue, 22 Feb 2022 19:20:08 -0800 Subject: [PATCH 02/40] add log --- pkg/providers/v1/controllers/aws_controllermanager.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/providers/v1/controllers/aws_controllermanager.go b/pkg/providers/v1/controllers/aws_controllermanager.go index c89c41f18a..5dabf3f975 100644 --- a/pkg/providers/v1/controllers/aws_controllermanager.go +++ b/pkg/providers/v1/controllers/aws_controllermanager.go @@ -29,7 +29,7 @@ func BuildControllerInitializers() map[string]app.ControllerInitFuncConstructor return controllerInitializers } -// StartCloudNodeControllerWrapper is used to take cloud cofig as input and start cloud node controller +// StartTaggingControllerWrapper is used to take cloud config as input and start tagging controller func startTaggingControllerWrapper(initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) app.InitFunc { return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) { return startTaggingController(ctx, initContext, completedConfig, cloud) @@ -37,7 +37,7 @@ func startTaggingControllerWrapper(initContext app.ControllerInitContext, comple } func startTaggingController(ctx context.Context, initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) (controller.Interface, bool, error) { - // Start the CloudNodeController + // Start the TaggingController taggingcontroller, err := taggingcontroller.NewTaggingController() if err != nil { klog.Warningf("failed to start tagging controller: %s", err) From 2b79d88e25013f1cb00af8d455abaf7988130f0f Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Wed, 23 Feb 2022 10:22:26 -0800 Subject: [PATCH 03/40] rearrange the controllers --- .../controllers/aws_controllermanager.go | 4 ++-- .../controllers/tagging/tagging-controller.go | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename {pkg/providers/v1 => cmd/aws-cloud-controller-manager}/controllers/aws_controllermanager.go (94%) rename {pkg/providers/v1 => cmd/aws-cloud-controller-manager}/controllers/tagging/tagging-controller.go (100%) diff --git a/pkg/providers/v1/controllers/aws_controllermanager.go b/cmd/aws-cloud-controller-manager/controllers/aws_controllermanager.go similarity index 94% rename from pkg/providers/v1/controllers/aws_controllermanager.go rename to cmd/aws-cloud-controller-manager/controllers/aws_controllermanager.go index 5dabf3f975..16e88cf54a 100644 --- a/pkg/providers/v1/controllers/aws_controllermanager.go +++ b/cmd/aws-cloud-controller-manager/controllers/aws_controllermanager.go @@ -5,11 +5,11 @@ import ( "k8s.io/klog/v2" cloudprovider "k8s.io/cloud-provider" + taggingcontroller "k8s.io/cloud-provider-aws/cmd/aws-cloud-controller-manager/controllers/tagging" "k8s.io/cloud-provider/app" cloudcontrollerconfig "k8s.io/cloud-provider/app/config" - "k8s.io/controller-manager/controller" genericcontrollermanager "k8s.io/controller-manager/app" - taggingcontroller "k8s.io/cloud-provider-aws/pkg/providers/v1/controllers/tagging" + "k8s.io/controller-manager/controller" ) // BuildControllerInitializers is used to add new controllers built in this package to diff --git a/pkg/providers/v1/controllers/tagging/tagging-controller.go b/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go similarity index 100% rename from pkg/providers/v1/controllers/tagging/tagging-controller.go rename to cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go From 851438b72babf2d28995fa342296ce9e984f3d41 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Wed, 23 Feb 2022 16:07:01 -0800 Subject: [PATCH 04/40] remove debugging log --- .../controllers/tagging/tagging-controller.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go b/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go index f63875e901..a8256834dd 100644 --- a/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go +++ b/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go @@ -31,13 +31,15 @@ func NewTaggingController() (*TaggingController, error) { tc := &TaggingController{ } + klog.Infof("Starting the TaggingController") + return tc, nil } // Run will start the controller to tag resources attached to a cluster -// at any point in time +// and untag resources detached from a cluster. func (tc *TaggingController) Run(stopCh <-chan struct{}) { - klog.Infof("Starting the TaggingController, eksResourceTagPrefix is %s.", eksResourceTagPrefix) + klog.Infof("Running the TaggingController, eksResourceTagPrefix is %s.", eksResourceTagPrefix) <-stopCh } \ No newline at end of file From a2b68a81718abcfad4f731238a3b03b0ce31cad0 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Wed, 23 Feb 2022 17:32:58 -0800 Subject: [PATCH 05/40] removed route controller --- .../controllers/aws_controllermanager.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmd/aws-cloud-controller-manager/controllers/aws_controllermanager.go b/cmd/aws-cloud-controller-manager/controllers/aws_controllermanager.go index 16e88cf54a..7808398f72 100644 --- a/cmd/aws-cloud-controller-manager/controllers/aws_controllermanager.go +++ b/cmd/aws-cloud-controller-manager/controllers/aws_controllermanager.go @@ -26,6 +26,9 @@ func BuildControllerInitializers() map[string]app.ControllerInitFuncConstructor controllerInitializers["tagging"] = taggingControllerInitFuncConstrustor + // for testing only, remove when raise a PR + delete(controllerInitializers, "route") + return controllerInitializers } From 93d63f33ce948364abaff2ae88d1a920eb8d2e81 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Wed, 23 Feb 2022 19:30:13 -0800 Subject: [PATCH 06/40] added a blank test file for the tagging controller --- .../controllers/aws_controllermanager.go | 5 +---- .../controllers/tagging/tagging-controller-test.go | 14 ++++++++++++++ .../controllers/tagging/tagging-controller.go | 4 ++-- 3 files changed, 17 insertions(+), 6 deletions(-) create mode 100644 cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller-test.go diff --git a/cmd/aws-cloud-controller-manager/controllers/aws_controllermanager.go b/cmd/aws-cloud-controller-manager/controllers/aws_controllermanager.go index 7808398f72..6e4ee514ac 100644 --- a/cmd/aws-cloud-controller-manager/controllers/aws_controllermanager.go +++ b/cmd/aws-cloud-controller-manager/controllers/aws_controllermanager.go @@ -26,13 +26,10 @@ func BuildControllerInitializers() map[string]app.ControllerInitFuncConstructor controllerInitializers["tagging"] = taggingControllerInitFuncConstrustor - // for testing only, remove when raise a PR - delete(controllerInitializers, "route") - return controllerInitializers } -// StartTaggingControllerWrapper is used to take cloud config as input and start tagging controller +// StartTaggingControllerWrapper is used to take cloud config as input and start the tagging controller func startTaggingControllerWrapper(initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) app.InitFunc { return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) { return startTaggingController(ctx, initContext, completedConfig, cloud) diff --git a/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller-test.go b/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller-test.go new file mode 100644 index 0000000000..d58e8615d4 --- /dev/null +++ b/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller-test.go @@ -0,0 +1,14 @@ +/* +Copyright 2016 The Kubernetes Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tagging \ No newline at end of file diff --git a/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go b/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go index a8256834dd..8c6e48ada7 100644 --- a/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go +++ b/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go @@ -15,6 +15,7 @@ package tagging import ( "k8s.io/klog/v2" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" ) // eksResourceTagPrefix is the prefix for tag to group resources that are used by eks @@ -31,14 +32,13 @@ func NewTaggingController() (*TaggingController, error) { tc := &TaggingController{ } - klog.Infof("Starting the TaggingController") - return tc, nil } // Run will start the controller to tag resources attached to a cluster // and untag resources detached from a cluster. func (tc *TaggingController) Run(stopCh <-chan struct{}) { + defer utilruntime.HandleCrash() klog.Infof("Running the TaggingController, eksResourceTagPrefix is %s.", eksResourceTagPrefix) <-stopCh From 64e5f6e154094a6804c6ad94bf27cd98953e905e Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Thu, 24 Feb 2022 13:06:24 -0800 Subject: [PATCH 07/40] remove predefined tag --- .../controllers/tagging/tagging-controller.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go b/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go index 8c6e48ada7..6e0ef4edcf 100644 --- a/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go +++ b/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go @@ -18,11 +18,6 @@ import ( utilruntime "k8s.io/apimachinery/pkg/util/runtime" ) -// eksResourceTagPrefix is the prefix for tag to group resources that are used by eks -// for a particular cluster, this tag is added to the existing tags. -// Example: "Key1=Val1,aws:eks:cluster-name:my-cluster=Val2" -const eksResourceTagPrefix = "aws:eks:cluster-name:" - // TaggingController is the controller implementation for tagging cluster resources type TaggingController struct { } @@ -39,7 +34,7 @@ func NewTaggingController() (*TaggingController, error) { // and untag resources detached from a cluster. func (tc *TaggingController) Run(stopCh <-chan struct{}) { defer utilruntime.HandleCrash() - klog.Infof("Running the TaggingController, eksResourceTagPrefix is %s.", eksResourceTagPrefix) + klog.Infof("Running the TaggingController") <-stopCh } \ No newline at end of file From 9adf41dd9596f4b4335bf092b02e101169ab48d8 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Mon, 28 Feb 2022 19:46:55 -0800 Subject: [PATCH 08/40] Refactoring based on recommendations --- .../controllers/aws_controllermanager.go | 11 ++++++++--- .../controllers/tagging/tagging-controller-test.go | 0 .../controllers/tagging/tagging-controller.go | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) rename {cmd/aws-cloud-controller-manager => pkg}/controllers/aws_controllermanager.go (85%) rename {cmd/aws-cloud-controller-manager => pkg}/controllers/tagging/tagging-controller-test.go (100%) rename {cmd/aws-cloud-controller-manager => pkg}/controllers/tagging/tagging-controller.go (100%) diff --git a/cmd/aws-cloud-controller-manager/controllers/aws_controllermanager.go b/pkg/controllers/aws_controllermanager.go similarity index 85% rename from cmd/aws-cloud-controller-manager/controllers/aws_controllermanager.go rename to pkg/controllers/aws_controllermanager.go index 6e4ee514ac..951d873761 100644 --- a/cmd/aws-cloud-controller-manager/controllers/aws_controllermanager.go +++ b/pkg/controllers/aws_controllermanager.go @@ -5,13 +5,18 @@ import ( "k8s.io/klog/v2" cloudprovider "k8s.io/cloud-provider" - taggingcontroller "k8s.io/cloud-provider-aws/cmd/aws-cloud-controller-manager/controllers/tagging" + taggingcontroller "k8s.io/cloud-provider-aws/pkg/controllers/tagging" "k8s.io/cloud-provider/app" cloudcontrollerconfig "k8s.io/cloud-provider/app/config" genericcontrollermanager "k8s.io/controller-manager/app" "k8s.io/controller-manager/controller" ) +const ( + TaggingControllerClientName = "tagging-controller" + TaggingControllerKey = "tagging" +) + // BuildControllerInitializers is used to add new controllers built in this package to // the existing list of controllers from cloud-provider func BuildControllerInitializers() map[string]app.ControllerInitFuncConstructor { @@ -19,12 +24,12 @@ func BuildControllerInitializers() map[string]app.ControllerInitFuncConstructor taggingControllerInitFuncConstrustor := app.ControllerInitFuncConstructor{ InitContext: app.ControllerInitContext{ - ClientName: "tagging-controller", + ClientName: TaggingControllerClientName, }, Constructor: startTaggingControllerWrapper, } - controllerInitializers["tagging"] = taggingControllerInitFuncConstrustor + controllerInitializers[TaggingControllerKey] = taggingControllerInitFuncConstrustor return controllerInitializers } diff --git a/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller-test.go b/pkg/controllers/tagging/tagging-controller-test.go similarity index 100% rename from cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller-test.go rename to pkg/controllers/tagging/tagging-controller-test.go diff --git a/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go b/pkg/controllers/tagging/tagging-controller.go similarity index 100% rename from cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go rename to pkg/controllers/tagging/tagging-controller.go index 6e0ef4edcf..b090629ea4 100644 --- a/cmd/aws-cloud-controller-manager/controllers/tagging/tagging-controller.go +++ b/pkg/controllers/tagging/tagging-controller.go @@ -14,8 +14,8 @@ limitations under the License. package tagging import ( - "k8s.io/klog/v2" utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/klog/v2" ) // TaggingController is the controller implementation for tagging cluster resources From 96a052e8d9db273175374cb591d669176da5bf15 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Mon, 28 Feb 2022 19:49:11 -0800 Subject: [PATCH 09/40] Sticking to the naming convention --- .../{aws_controllermanager.go => aws_controller_manager.go} | 0 .../tagging/{tagging-controller.go => tagging_controller.go} | 0 .../{tagging-controller-test.go => tagging_controller_test.go} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename pkg/controllers/{aws_controllermanager.go => aws_controller_manager.go} (100%) rename pkg/controllers/tagging/{tagging-controller.go => tagging_controller.go} (100%) rename pkg/controllers/tagging/{tagging-controller-test.go => tagging_controller_test.go} (100%) diff --git a/pkg/controllers/aws_controllermanager.go b/pkg/controllers/aws_controller_manager.go similarity index 100% rename from pkg/controllers/aws_controllermanager.go rename to pkg/controllers/aws_controller_manager.go diff --git a/pkg/controllers/tagging/tagging-controller.go b/pkg/controllers/tagging/tagging_controller.go similarity index 100% rename from pkg/controllers/tagging/tagging-controller.go rename to pkg/controllers/tagging/tagging_controller.go diff --git a/pkg/controllers/tagging/tagging-controller-test.go b/pkg/controllers/tagging/tagging_controller_test.go similarity index 100% rename from pkg/controllers/tagging/tagging-controller-test.go rename to pkg/controllers/tagging/tagging_controller_test.go From a538f54bb6c05b8cb49e1395b89ffdcd813f0cd9 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Tue, 8 Mar 2022 15:29:45 -0800 Subject: [PATCH 10/40] address more comments on naming --- pkg/controllers/aws_controller_manager.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pkg/controllers/aws_controller_manager.go b/pkg/controllers/aws_controller_manager.go index 951d873761..8dfb5b80f3 100644 --- a/pkg/controllers/aws_controller_manager.go +++ b/pkg/controllers/aws_controller_manager.go @@ -2,8 +2,6 @@ package controllers import ( "context" - "k8s.io/klog/v2" - cloudprovider "k8s.io/cloud-provider" taggingcontroller "k8s.io/cloud-provider-aws/pkg/controllers/tagging" "k8s.io/cloud-provider/app" @@ -22,14 +20,14 @@ const ( func BuildControllerInitializers() map[string]app.ControllerInitFuncConstructor { controllerInitializers := app.DefaultInitFuncConstructors - taggingControllerInitFuncConstrustor := app.ControllerInitFuncConstructor{ + taggingControllerConstructor := app.ControllerInitFuncConstructor{ InitContext: app.ControllerInitContext{ ClientName: TaggingControllerClientName, }, Constructor: startTaggingControllerWrapper, } - controllerInitializers[TaggingControllerKey] = taggingControllerInitFuncConstrustor + controllerInitializers[TaggingControllerKey] = taggingControllerConstructor return controllerInitializers } @@ -52,4 +50,4 @@ func startTaggingController(ctx context.Context, initContext app.ControllerInitC go taggingcontroller.Run(ctx.Done()) return nil, true, nil -} \ No newline at end of file +} From f40691a241d3ad4b65b403db1a3a49cd9b546004 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Mon, 14 Mar 2022 19:38:32 -0700 Subject: [PATCH 11/40] Using ListNode to get nodes entering and leaving the cluster. * Remove route controller again * Print out clusterName for new nodes * tag new nodes when it comes online * only process a node once * check taggNodes size * add debugging * use node name as key * delete k,v from taggedNodes if node no longer exists * log if delete is done * Get a list of nodes and tag them if havent * get instance IDs of the nodes that need tagging * use MapToAWSInstanceID instead * restored v1 aws * restored from master * tag instance with a random tag * add klog * tag and untag node resources * Prepare for pr * Initialize nodeMap --- pkg/controllers/aws_controller_manager.go | 26 +++- pkg/controllers/tagging/tagging_controller.go | 143 +++++++++++++++++- 2 files changed, 161 insertions(+), 8 deletions(-) diff --git a/pkg/controllers/aws_controller_manager.go b/pkg/controllers/aws_controller_manager.go index 8dfb5b80f3..7702855095 100644 --- a/pkg/controllers/aws_controller_manager.go +++ b/pkg/controllers/aws_controller_manager.go @@ -1,3 +1,16 @@ +/* +Copyright 2016 The Kubernetes Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package controllers import ( @@ -8,6 +21,7 @@ import ( cloudcontrollerconfig "k8s.io/cloud-provider/app/config" genericcontrollermanager "k8s.io/controller-manager/app" "k8s.io/controller-manager/controller" + "k8s.io/klog/v2" ) const ( @@ -29,6 +43,9 @@ func BuildControllerInitializers() map[string]app.ControllerInitFuncConstructor controllerInitializers[TaggingControllerKey] = taggingControllerConstructor + // TODO: remove the following line to enable the route controller + delete(controllerInitializers, "route") + return controllerInitializers } @@ -41,13 +58,18 @@ func startTaggingControllerWrapper(initContext app.ControllerInitContext, comple func startTaggingController(ctx context.Context, initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) (controller.Interface, bool, error) { // Start the TaggingController - taggingcontroller, err := taggingcontroller.NewTaggingController() + taggingcontroller, err := taggingcontroller.NewTaggingController( + completedConfig.SharedInformers.Core().V1().Nodes(), + // cloud node lifecycle controller uses existing cluster role from node-controller + completedConfig.ClientBuilder.ClientOrDie(initContext.ClientName), + cloud, + completedConfig.ComponentConfig.KubeCloudShared.NodeMonitorPeriod.Duration) if err != nil { klog.Warningf("failed to start tagging controller: %s", err) return nil, false, nil } - go taggingcontroller.Run(ctx.Done()) + go taggingcontroller.Run(ctx) return nil, true, nil } diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index b090629ea4..09aa7852b3 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -14,17 +14,54 @@ limitations under the License. package tagging import ( + "context" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" + coreinformers "k8s.io/client-go/informers/core/v1" + clientset "k8s.io/client-go/kubernetes" + v1lister "k8s.io/client-go/listers/core/v1" + cloudprovider "k8s.io/cloud-provider" "k8s.io/klog/v2" + "time" ) -// TaggingController is the controller implementation for tagging cluster resources +// TaggingController is the controller implementation for tagging cluster resources. +// It periodically check for Node events (creating/deleting) to apply appropriate +// tags to resources. type TaggingController struct { + kubeClient clientset.Interface + nodeLister v1lister.NodeLister + + cloud cloudprovider.Interface + + // Value controlling TaggingController monitoring period, i.e. how often does TaggingController + // check node list. This value should be lower than nodeMonitorGracePeriod + // set in controller-manager + nodeMonitorPeriod time.Duration + + // A map presenting the node and whether it currently exists + taggedNodes map[string]bool + + // A map representing the nodes that were ever in the cluster + nodeMap map[string]*v1.Node } // NewTaggingController creates a NewTaggingController object -func NewTaggingController() (*TaggingController, error) { +func NewTaggingController( + nodeInformer coreinformers.NodeInformer, + kubeClient clientset.Interface, + cloud cloudprovider.Interface, + nodeMonitorPeriod time.Duration) (*TaggingController, error) { + tc := &TaggingController{ + kubeClient: kubeClient, + nodeLister: nodeInformer.Lister(), + cloud: cloud, + nodeMonitorPeriod: nodeMonitorPeriod, + taggedNodes: make(map[string]bool), + nodeMap: make(map[string]*v1.Node), } return tc, nil @@ -32,9 +69,103 @@ func NewTaggingController() (*TaggingController, error) { // Run will start the controller to tag resources attached to a cluster // and untag resources detached from a cluster. -func (tc *TaggingController) Run(stopCh <-chan struct{}) { +func (tc *TaggingController) Run(ctx context.Context) { defer utilruntime.HandleCrash() - klog.Infof("Running the TaggingController") - <-stopCh -} \ No newline at end of file + wait.UntilWithContext(ctx, tc.monitorNodes, tc.nodeMonitorPeriod) +} + +func (tc *TaggingController) monitorNodes(ctx context.Context) { + nodes, err := tc.nodeLister.List(labels.Everything()) + if err != nil { + klog.Errorf("error listing nodes from cache: %s", err) + return + } + + for k := range tc.taggedNodes { + tc.taggedNodes[k] = false + } + + var nodesToTag []*v1.Node + for _, node := range nodes { + if _, ok := tc.taggedNodes[node.GetName()]; !ok { + nodesToTag = append(nodesToTag, node) + } + + tc.nodeMap[node.GetName()] = node + tc.taggedNodes[node.GetName()] = true + } + tc.tagNodesResources(nodesToTag) + + var nodesToUntag []*v1.Node + for nodeName, existed := range tc.taggedNodes { + if existed == false { + nodesToUntag = append(nodesToUntag, tc.nodeMap[nodeName]) + } + } + tc.untagNodeResources(nodesToUntag) + + tc.syncDeletedNodesToTaggedNodes() +} + +// tagNodesResources tag node resources from a list of node +// If we want to tag more resources, modify this function appropriately +func (tc *TaggingController) tagNodesResources(nodes []*v1.Node) { + for _, node := range nodes { + klog.Infof("Tagging resources for node %s.", node.GetName()) + } +} + +func (tc *TaggingController) untagNodeResources(nodes []*v1.Node) { + for _, node := range nodes { + klog.Infof("Untagging resources for node %s.", node.GetName()) + } +} + +// syncDeletedNodes delete (k, v) from taggedNodes +// if it doesn't exist +func (tc *TaggingController) syncDeletedNodesToTaggedNodes() { + for k, v := range tc.taggedNodes { + if v == false { + delete(tc.taggedNodes, k) + } + } +} + +//// tagEc2Instances applies the provided tags to each EC2 instances in +//// the cluster. +//func (tc *TaggingController) tagEc2Instances(nodes []*v1.Node) { +// var instanceIds []*string +// for _, node := range nodes { +// instanceId, _ := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() +// instanceIds = append(instanceIds, aws.String(string(instanceId))) +// } +// +// tc.tagResources(instanceIds) +//} + +//func (tc *TaggingController) tagResources(resourceIds []*string) { +// request := &ec2.CreateTagsInput{ +// Resources: resourceIds, +// Tags: tc.getTagsFromInputs(), +// } +// +// _, error := awsv1.awsSdkEC2.CreateTags(request) +// awsv1.Cloud.TagResoures(request) +// +// if error != nil { +// klog.Errorf("Error occurred trying to tag resources, %s", error) +// } +//} +// +//// Sample function demonstrating that we'll get the tag list from user +//func (tc *TaggingController) getTagsFromInputs() []*ec2.Tag { +// var awsTags []*ec2.Tag +// tag := &ec2.Tag{ +// Key: aws.String("Sample Key"), +// Value: aws.String("Sample value"), +// } +// awsTags = append(awsTags, tag) +// +// return awsTags +//} From 137b089eb7c75b59b26d7d191753f827022c1e46 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Mon, 14 Mar 2022 19:48:58 -0700 Subject: [PATCH 12/40] refactor --- pkg/controllers/aws_controller_manager.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pkg/controllers/aws_controller_manager.go b/pkg/controllers/aws_controller_manager.go index 7702855095..782cc6effb 100644 --- a/pkg/controllers/aws_controller_manager.go +++ b/pkg/controllers/aws_controller_manager.go @@ -43,9 +43,6 @@ func BuildControllerInitializers() map[string]app.ControllerInitFuncConstructor controllerInitializers[TaggingControllerKey] = taggingControllerConstructor - // TODO: remove the following line to enable the route controller - delete(controllerInitializers, "route") - return controllerInitializers } @@ -60,7 +57,6 @@ func startTaggingController(ctx context.Context, initContext app.ControllerInitC // Start the TaggingController taggingcontroller, err := taggingcontroller.NewTaggingController( completedConfig.SharedInformers.Core().V1().Nodes(), - // cloud node lifecycle controller uses existing cluster role from node-controller completedConfig.ClientBuilder.ClientOrDie(initContext.ClientName), cloud, completedConfig.ComponentConfig.KubeCloudShared.NodeMonitorPeriod.Duration) From 5022b209472443597210601458b3acda7e8bd46b Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Fri, 18 Mar 2022 19:35:07 -0700 Subject: [PATCH 13/40] Add testing and controller config skeletons --- pkg/config/cloud_config.go | 6 + pkg/config/controller_config.go | 56 +++++++ pkg/config/runtime_config.go | 5 + pkg/controllers/aws_controller_manager.go | 18 +++ pkg/controllers/tagging/tagging_controller.go | 99 ++++++------ .../tagging/tagging_controller_test.go | 148 +++++++++++++++++- 6 files changed, 287 insertions(+), 45 deletions(-) create mode 100644 pkg/config/cloud_config.go create mode 100644 pkg/config/controller_config.go create mode 100644 pkg/config/runtime_config.go diff --git a/pkg/config/cloud_config.go b/pkg/config/cloud_config.go new file mode 100644 index 0000000000..8271eb8d16 --- /dev/null +++ b/pkg/config/cloud_config.go @@ -0,0 +1,6 @@ +package config + +type CloudConfig struct { +} + +var CloudCfg = &CloudConfig{} diff --git a/pkg/config/controller_config.go b/pkg/config/controller_config.go new file mode 100644 index 0000000000..c0a0d2eaa0 --- /dev/null +++ b/pkg/config/controller_config.go @@ -0,0 +1,56 @@ +package config + +import ( + "flag" + "fmt" + "github.com/spf13/pflag" + "k8s.io/cloud-provider/config" + "os" +) + +const ( + flagResourceTags = "resource-tags" + flagTaggingResources = "tagging-resources" +) + +var ControllerCFG = &ControllerConfig{} + +// ControllerConfig stores the additional flags for global usage +type ControllerConfig struct { + config.KubeCloudSharedConfiguration + ResourceTags string + TaggingResources string + + //RuntimeConfig RuntimeConfig + //CloudConfig *CloudConfig +} + +func (cfg *ControllerConfig) BindFlags(fs *pflag.FlagSet) { + fs.StringVar(&cfg.ResourceTags, flagResourceTags, "", "List of tags for the cluster.") + fs.StringVar(&cfg.TaggingResources, flagTaggingResources, "", "List of EC2 resources that need to be tagged.") +} + +// Validate the controller configuration +func (cfg *ControllerConfig) Validate() error { + if len(cfg.TaggingResources) > 0 && len(cfg.ResourceTags) == 0 { + return fmt.Errorf("--resource-tags must be set when --tagging-resources is not empty.") + } + + return nil +} + +func (cfg *ControllerConfig) LoadControllerConfig() error { + fs := pflag.NewFlagSet("", pflag.ExitOnError) + fs.AddGoFlagSet(flag.CommandLine) + cfg.BindFlags(fs) + + if err := fs.Parse(os.Args); err != nil { + return err + } + + if err := cfg.Validate(); err != nil { + return err + } + + return nil +} diff --git a/pkg/config/runtime_config.go b/pkg/config/runtime_config.go new file mode 100644 index 0000000000..b848f5a468 --- /dev/null +++ b/pkg/config/runtime_config.go @@ -0,0 +1,5 @@ +package config + +// RuntimeConfig stores the configuration for controller-runtime +type RuntimeConfig struct { +} diff --git a/pkg/controllers/aws_controller_manager.go b/pkg/controllers/aws_controller_manager.go index 782cc6effb..0876d808cc 100644 --- a/pkg/controllers/aws_controller_manager.go +++ b/pkg/controllers/aws_controller_manager.go @@ -15,6 +15,7 @@ package controllers import ( "context" + "errors" cloudprovider "k8s.io/cloud-provider" taggingcontroller "k8s.io/cloud-provider-aws/pkg/controllers/tagging" "k8s.io/cloud-provider/app" @@ -43,6 +44,9 @@ func BuildControllerInitializers() map[string]app.ControllerInitFuncConstructor controllerInitializers[TaggingControllerKey] = taggingControllerConstructor + // TODO: remove the following line to enable the route controller + delete(controllerInitializers, "route") + return controllerInitializers } @@ -54,6 +58,12 @@ func startTaggingControllerWrapper(initContext app.ControllerInitContext, comple } func startTaggingController(ctx context.Context, initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) (controller.Interface, bool, error) { + // TODO: add in validation for user input for new flags + //if ok, error := verifyTaggingControllerUserInput(completedConfig.ComponentConfig.KubeCloudShared.ClusterCIDR); ok { + // klog.Infof("Will not start the tagging controller due to invalid user input, --configure-cloud-routes: %v", error) + // return nil, false, nil + //} + // Start the TaggingController taggingcontroller, err := taggingcontroller.NewTaggingController( completedConfig.SharedInformers.Core().V1().Nodes(), @@ -69,3 +79,11 @@ func startTaggingController(ctx context.Context, initContext app.ControllerInitC return nil, true, nil } + +func verifyTaggingControllerUserInput(input string) (bool, error) { + if len(input) == 0 { + return false, errors.New("Provide inputs for --resource-tags and --tagging-resources to use the tagging controller.") + } + + return true, nil +} diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index 09aa7852b3..9c9c8aba21 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -15,6 +15,8 @@ package tagging import ( "context" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -23,6 +25,7 @@ import ( clientset "k8s.io/client-go/kubernetes" v1lister "k8s.io/client-go/listers/core/v1" cloudprovider "k8s.io/cloud-provider" + awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" "k8s.io/klog/v2" "time" ) @@ -44,8 +47,14 @@ type TaggingController struct { // A map presenting the node and whether it currently exists taggedNodes map[string]bool - // A map representing the nodes that were ever in the cluster + // A map representing nodes that were part of the cluster at any point in time nodeMap map[string]*v1.Node + + // Representing the user input for tags + tags string + + // Representing the resources to tag + resources string } // NewTaggingController creates a NewTaggingController object @@ -62,8 +71,10 @@ func NewTaggingController( nodeMonitorPeriod: nodeMonitorPeriod, taggedNodes: make(map[string]bool), nodeMap: make(map[string]*v1.Node), + // TODO: add controller configs including the new flags + //tags: conf.ControllerCFG.ResourceTags, + //resources: conf.ControllerCFG.TaggingResources, } - return tc, nil } @@ -72,13 +83,13 @@ func NewTaggingController( func (tc *TaggingController) Run(ctx context.Context) { defer utilruntime.HandleCrash() - wait.UntilWithContext(ctx, tc.monitorNodes, tc.nodeMonitorPeriod) + wait.UntilWithContext(ctx, tc.MonitorNodes, tc.nodeMonitorPeriod) } -func (tc *TaggingController) monitorNodes(ctx context.Context) { +func (tc *TaggingController) MonitorNodes(ctx context.Context) { nodes, err := tc.nodeLister.List(labels.Everything()) if err != nil { - klog.Errorf("error listing nodes from cache: %s", err) + klog.Errorf("error listing nodes: %s", err) return } @@ -112,13 +123,13 @@ func (tc *TaggingController) monitorNodes(ctx context.Context) { // If we want to tag more resources, modify this function appropriately func (tc *TaggingController) tagNodesResources(nodes []*v1.Node) { for _, node := range nodes { - klog.Infof("Tagging resources for node %s.", node.GetName()) + klog.Infof("Tagging resources for node %s with %s.", node.GetName(), tc.tags) } } func (tc *TaggingController) untagNodeResources(nodes []*v1.Node) { for _, node := range nodes { - klog.Infof("Untagging resources for node %s.", node.GetName()) + klog.Infof("Untagging resources for node %s with %s.", node.GetName(), tc.tags) } } @@ -132,40 +143,40 @@ func (tc *TaggingController) syncDeletedNodesToTaggedNodes() { } } -//// tagEc2Instances applies the provided tags to each EC2 instances in -//// the cluster. -//func (tc *TaggingController) tagEc2Instances(nodes []*v1.Node) { -// var instanceIds []*string -// for _, node := range nodes { -// instanceId, _ := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() -// instanceIds = append(instanceIds, aws.String(string(instanceId))) -// } -// -// tc.tagResources(instanceIds) -//} - -//func (tc *TaggingController) tagResources(resourceIds []*string) { -// request := &ec2.CreateTagsInput{ -// Resources: resourceIds, -// Tags: tc.getTagsFromInputs(), -// } -// -// _, error := awsv1.awsSdkEC2.CreateTags(request) -// awsv1.Cloud.TagResoures(request) -// -// if error != nil { -// klog.Errorf("Error occurred trying to tag resources, %s", error) -// } -//} -// -//// Sample function demonstrating that we'll get the tag list from user -//func (tc *TaggingController) getTagsFromInputs() []*ec2.Tag { -// var awsTags []*ec2.Tag -// tag := &ec2.Tag{ -// Key: aws.String("Sample Key"), -// Value: aws.String("Sample value"), -// } -// awsTags = append(awsTags, tag) -// -// return awsTags -//} +// tagEc2Instances applies the provided tags to each EC2 instances in +// the cluster. +func (tc *TaggingController) tagEc2Instances(nodes []*v1.Node) { + var instanceIds []*string + for _, node := range nodes { + instanceId, _ := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() + instanceIds = append(instanceIds, aws.String(string(instanceId))) + } + + tc.tagResources(instanceIds) +} + +// TODO: call EC2 to tag instances +func (tc *TaggingController) tagResources(resourceIds []*string) { + //request := &ec2.CreateTagsInput{ + // Resources: resourceIds, + // Tags: tc.getTagsFromInputs(), + //} + // + //_, error := awsv1..EC2.CreateTags(request) + // + //if error != nil { + // klog.Errorf("Error occurred trying to tag resources, %s", error) + //} +} + +// Sample function demonstrating that we'll get the tag list from user +func (tc *TaggingController) getTagsFromInputs() []*ec2.Tag { + var awsTags []*ec2.Tag + tag := &ec2.Tag{ + Key: aws.String("Sample Key"), + Value: aws.String("Sample value"), + } + awsTags = append(awsTags, tag) + + return awsTags +} diff --git a/pkg/controllers/tagging/tagging_controller_test.go b/pkg/controllers/tagging/tagging_controller_test.go index d58e8615d4..740b9227e5 100644 --- a/pkg/controllers/tagging/tagging_controller_test.go +++ b/pkg/controllers/tagging/tagging_controller_test.go @@ -11,4 +11,150 @@ See the License for the specific language governing permissions and limitations under the License. */ -package tagging \ No newline at end of file +package tagging + +import ( + "context" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/informers" + coreinformers "k8s.io/client-go/informers/core/v1" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/tools/record" + fakecloud "k8s.io/cloud-provider/fake" + "k8s.io/klog/v2" + "testing" + "time" +) + +func Test_NodesJoining(t *testing.T) { + testcases := []struct { + name string + fakeCloud *fakecloud.Cloud + currNode *v1.Node + taggingController TaggingController + noOfNodes int + }{ + { + name: "node0 joins the cluster.", + currNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + }, + fakeCloud: &fakecloud.Cloud{ + ExistsByProviderID: false, + }, + taggingController: TaggingController{ + taggedNodes: make(map[string]bool), + nodeMap: make(map[string]*v1.Node), + }, + noOfNodes: 1, + }, + { + name: "node1 joins the cluster, node0 left.", + currNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + }, + fakeCloud: &fakecloud.Cloud{ + ExistsByProviderID: false, + }, + taggingController: TaggingController{ + taggedNodes: map[string]bool{ + "node0": true, + }, + nodeMap: map[string]*v1.Node{ + "node0": { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + }, + }, + }, + noOfNodes: 1, + }, + { + name: "node2 joins the cluster, node0 and node1 left.", + currNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + }, + fakeCloud: &fakecloud.Cloud{ + ExistsByProviderID: false, + }, + taggingController: TaggingController{ + taggedNodes: map[string]bool{ + "node0": true, + "node1": true, + }, + nodeMap: map[string]*v1.Node{ + "node0": { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + }, + "node1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + }, + }, + }, + noOfNodes: 1, + }, + } + + for _, testcase := range testcases { + t.Run(testcase.name, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + clientset := fake.NewSimpleClientset(testcase.currNode) + informer := informers.NewSharedInformerFactory(clientset, time.Second) + nodeInformer := informer.Core().V1().Nodes() + + if err := syncNodeStore(nodeInformer, clientset); err != nil { + t.Errorf("unexpected error: %v", err) + } + + eventBroadcaster := record.NewBroadcaster() + testcase.taggingController.nodeLister = nodeInformer.Lister() + testcase.taggingController.kubeClient = clientset + testcase.taggingController.cloud = testcase.fakeCloud + testcase.taggingController.nodeMonitorPeriod = 1 * time.Second + + w := eventBroadcaster.StartLogging(klog.Infof) + defer w.Stop() + + nodeCountBeforeTagging := len(testcase.taggingController.nodeMap) + testcase.taggingController.MonitorNodes(ctx) + + klog.Infof("testcase.taggingController.taggedNodes %s", testcase.taggingController.taggedNodes) + klog.Errorf("testcase.taggingController.nodeMap %s", testcase.taggingController.nodeMap) + + if len(testcase.taggingController.taggedNodes) != testcase.noOfNodes || len(testcase.taggingController.nodeMap) != nodeCountBeforeTagging+testcase.noOfNodes { + t.Errorf("taggedNodes must contain %d element(s), and nodeMap must contain %d element(s).", testcase.noOfNodes, nodeCountBeforeTagging+testcase.noOfNodes) + } + }) + } +} + +func syncNodeStore(nodeinformer coreinformers.NodeInformer, f *fake.Clientset) error { + nodes, err := f.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) + if err != nil { + return err + } + newElems := make([]interface{}, 0, len(nodes.Items)) + for i := range nodes.Items { + newElems = append(newElems, &nodes.Items[i]) + } + return nodeinformer.Informer().GetStore().Replace(newElems, "newRV") +} From e091cb4efcc40db0ae08e512a1ef12c7e6b1d051 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Mon, 21 Mar 2022 19:41:04 -0700 Subject: [PATCH 14/40] Added tagging and flags mechanisms --- .gitignore | 7 + cmd/aws-cloud-controller-manager/main.go | 1 + pkg/config/cloud_config.go | 6 - pkg/config/controller_config.go | 56 - pkg/config/runtime_config.go | 5 - pkg/controllers/aws_controller_manager.go | 89 - pkg/controllers/options/tagging_controller.go | 22 + pkg/controllers/tagging/tagging_controller.go | 122 +- .../tagging/tagging_controller_wrapper.go | 54 + pkg/providers/v1/aws.go | 5162 +++++++++++++++++ pkg/providers/v1/aws_fakes.go | 716 +++ pkg/providers/v1/tags.go | 357 ++ 12 files changed, 6384 insertions(+), 213 deletions(-) delete mode 100644 pkg/config/cloud_config.go delete mode 100644 pkg/config/controller_config.go delete mode 100644 pkg/config/runtime_config.go delete mode 100644 pkg/controllers/aws_controller_manager.go create mode 100644 pkg/controllers/options/tagging_controller.go create mode 100644 pkg/controllers/tagging/tagging_controller_wrapper.go create mode 100644 pkg/providers/v1/aws.go create mode 100644 pkg/providers/v1/aws_fakes.go create mode 100644 pkg/providers/v1/tags.go diff --git a/.gitignore b/.gitignore index f922b978d1..67d4cdb190 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,9 @@ /aws-cloud-controller-manager /cloudconfig +_output/ +/kops-example +docs/book/_book/ +site/ +.vscode/ +e2e.test +.idea/ diff --git a/cmd/aws-cloud-controller-manager/main.go b/cmd/aws-cloud-controller-manager/main.go index ed4c7ecd1f..ab8dd6f0f0 100644 --- a/cmd/aws-cloud-controller-manager/main.go +++ b/cmd/aws-cloud-controller-manager/main.go @@ -27,6 +27,7 @@ package main import ( "fmt" + "k8s.io/cloud-provider-aws/pkg/controllers/tagging" "math/rand" "net" "net/http" diff --git a/pkg/config/cloud_config.go b/pkg/config/cloud_config.go deleted file mode 100644 index 8271eb8d16..0000000000 --- a/pkg/config/cloud_config.go +++ /dev/null @@ -1,6 +0,0 @@ -package config - -type CloudConfig struct { -} - -var CloudCfg = &CloudConfig{} diff --git a/pkg/config/controller_config.go b/pkg/config/controller_config.go deleted file mode 100644 index c0a0d2eaa0..0000000000 --- a/pkg/config/controller_config.go +++ /dev/null @@ -1,56 +0,0 @@ -package config - -import ( - "flag" - "fmt" - "github.com/spf13/pflag" - "k8s.io/cloud-provider/config" - "os" -) - -const ( - flagResourceTags = "resource-tags" - flagTaggingResources = "tagging-resources" -) - -var ControllerCFG = &ControllerConfig{} - -// ControllerConfig stores the additional flags for global usage -type ControllerConfig struct { - config.KubeCloudSharedConfiguration - ResourceTags string - TaggingResources string - - //RuntimeConfig RuntimeConfig - //CloudConfig *CloudConfig -} - -func (cfg *ControllerConfig) BindFlags(fs *pflag.FlagSet) { - fs.StringVar(&cfg.ResourceTags, flagResourceTags, "", "List of tags for the cluster.") - fs.StringVar(&cfg.TaggingResources, flagTaggingResources, "", "List of EC2 resources that need to be tagged.") -} - -// Validate the controller configuration -func (cfg *ControllerConfig) Validate() error { - if len(cfg.TaggingResources) > 0 && len(cfg.ResourceTags) == 0 { - return fmt.Errorf("--resource-tags must be set when --tagging-resources is not empty.") - } - - return nil -} - -func (cfg *ControllerConfig) LoadControllerConfig() error { - fs := pflag.NewFlagSet("", pflag.ExitOnError) - fs.AddGoFlagSet(flag.CommandLine) - cfg.BindFlags(fs) - - if err := fs.Parse(os.Args); err != nil { - return err - } - - if err := cfg.Validate(); err != nil { - return err - } - - return nil -} diff --git a/pkg/config/runtime_config.go b/pkg/config/runtime_config.go deleted file mode 100644 index b848f5a468..0000000000 --- a/pkg/config/runtime_config.go +++ /dev/null @@ -1,5 +0,0 @@ -package config - -// RuntimeConfig stores the configuration for controller-runtime -type RuntimeConfig struct { -} diff --git a/pkg/controllers/aws_controller_manager.go b/pkg/controllers/aws_controller_manager.go deleted file mode 100644 index 0876d808cc..0000000000 --- a/pkg/controllers/aws_controller_manager.go +++ /dev/null @@ -1,89 +0,0 @@ -/* -Copyright 2016 The Kubernetes Authors. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package controllers - -import ( - "context" - "errors" - cloudprovider "k8s.io/cloud-provider" - taggingcontroller "k8s.io/cloud-provider-aws/pkg/controllers/tagging" - "k8s.io/cloud-provider/app" - cloudcontrollerconfig "k8s.io/cloud-provider/app/config" - genericcontrollermanager "k8s.io/controller-manager/app" - "k8s.io/controller-manager/controller" - "k8s.io/klog/v2" -) - -const ( - TaggingControllerClientName = "tagging-controller" - TaggingControllerKey = "tagging" -) - -// BuildControllerInitializers is used to add new controllers built in this package to -// the existing list of controllers from cloud-provider -func BuildControllerInitializers() map[string]app.ControllerInitFuncConstructor { - controllerInitializers := app.DefaultInitFuncConstructors - - taggingControllerConstructor := app.ControllerInitFuncConstructor{ - InitContext: app.ControllerInitContext{ - ClientName: TaggingControllerClientName, - }, - Constructor: startTaggingControllerWrapper, - } - - controllerInitializers[TaggingControllerKey] = taggingControllerConstructor - - // TODO: remove the following line to enable the route controller - delete(controllerInitializers, "route") - - return controllerInitializers -} - -// StartTaggingControllerWrapper is used to take cloud config as input and start the tagging controller -func startTaggingControllerWrapper(initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) app.InitFunc { - return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) { - return startTaggingController(ctx, initContext, completedConfig, cloud) - } -} - -func startTaggingController(ctx context.Context, initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) (controller.Interface, bool, error) { - // TODO: add in validation for user input for new flags - //if ok, error := verifyTaggingControllerUserInput(completedConfig.ComponentConfig.KubeCloudShared.ClusterCIDR); ok { - // klog.Infof("Will not start the tagging controller due to invalid user input, --configure-cloud-routes: %v", error) - // return nil, false, nil - //} - - // Start the TaggingController - taggingcontroller, err := taggingcontroller.NewTaggingController( - completedConfig.SharedInformers.Core().V1().Nodes(), - completedConfig.ClientBuilder.ClientOrDie(initContext.ClientName), - cloud, - completedConfig.ComponentConfig.KubeCloudShared.NodeMonitorPeriod.Duration) - if err != nil { - klog.Warningf("failed to start tagging controller: %s", err) - return nil, false, nil - } - - go taggingcontroller.Run(ctx) - - return nil, true, nil -} - -func verifyTaggingControllerUserInput(input string) (bool, error) { - if len(input) == 0 { - return false, errors.New("Provide inputs for --resource-tags and --tagging-resources to use the tagging controller.") - } - - return true, nil -} diff --git a/pkg/controllers/options/tagging_controller.go b/pkg/controllers/options/tagging_controller.go new file mode 100644 index 0000000000..e921c3519a --- /dev/null +++ b/pkg/controllers/options/tagging_controller.go @@ -0,0 +1,22 @@ +package options + +import ( + "fmt" + "github.com/spf13/pflag" +) + +type TaggingControllerOptions struct { + Tags map[string]string +} + +func (o *TaggingControllerOptions) AddFlags(fs *pflag.FlagSet) { + fs.StringToStringVar(&o.Tags, "tags", o.Tags, "Tags to apply to AWS resources in the tagging controller.") +} + +func (o *TaggingControllerOptions) Validate() error { + if len(o.Tags) == 0 { + return fmt.Errorf("--tags must not be empty.") + } + + return nil +} diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index 9c9c8aba21..84cdd6646b 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -15,8 +15,7 @@ package tagging import ( "context" - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/service/ec2" + "fmt" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -25,6 +24,7 @@ import ( clientset "k8s.io/client-go/kubernetes" v1lister "k8s.io/client-go/listers/core/v1" cloudprovider "k8s.io/cloud-provider" + "k8s.io/cloud-provider-aws/pkg/controllers/options" awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" "k8s.io/klog/v2" "time" @@ -34,10 +34,10 @@ import ( // It periodically check for Node events (creating/deleting) to apply appropriate // tags to resources. type TaggingController struct { - kubeClient clientset.Interface - nodeLister v1lister.NodeLister - - cloud cloudprovider.Interface + controllerOptions options.TaggingControllerOptions + kubeClient clientset.Interface + nodeLister v1lister.NodeLister + cloud *awsv1.Cloud // Value controlling TaggingController monitoring period, i.e. how often does TaggingController // check node list. This value should be lower than nodeMonitorGracePeriod @@ -51,10 +51,10 @@ type TaggingController struct { nodeMap map[string]*v1.Node // Representing the user input for tags - tags string + tags map[string]string // Representing the resources to tag - resources string + resources []string } // NewTaggingController creates a NewTaggingController object @@ -62,18 +62,23 @@ func NewTaggingController( nodeInformer coreinformers.NodeInformer, kubeClient clientset.Interface, cloud cloudprovider.Interface, - nodeMonitorPeriod time.Duration) (*TaggingController, error) { + nodeMonitorPeriod time.Duration, + tags map[string]string) (*TaggingController, error) { + + awsCloud, ok := cloud.(*awsv1.Cloud) + if !ok { + err := fmt.Errorf("tagging controller does not support %v provider", cloud.ProviderName()) + return nil, err + } tc := &TaggingController{ kubeClient: kubeClient, nodeLister: nodeInformer.Lister(), - cloud: cloud, + cloud: awsCloud, nodeMonitorPeriod: nodeMonitorPeriod, taggedNodes: make(map[string]bool), nodeMap: make(map[string]*v1.Node), - // TODO: add controller configs including the new flags - //tags: conf.ControllerCFG.ResourceTags, - //resources: conf.ControllerCFG.TaggingResources, + tags: tags, } return tc, nil } @@ -104,7 +109,6 @@ func (tc *TaggingController) MonitorNodes(ctx context.Context) { } tc.nodeMap[node.GetName()] = node - tc.taggedNodes[node.GetName()] = true } tc.tagNodesResources(nodesToTag) @@ -115,68 +119,72 @@ func (tc *TaggingController) MonitorNodes(ctx context.Context) { } } tc.untagNodeResources(nodesToUntag) - - tc.syncDeletedNodesToTaggedNodes() } // tagNodesResources tag node resources from a list of node // If we want to tag more resources, modify this function appropriately func (tc *TaggingController) tagNodesResources(nodes []*v1.Node) { for _, node := range nodes { - klog.Infof("Tagging resources for node %s with %s.", node.GetName(), tc.tags) - } -} + nodeTagged := false + nodeTagged = tc.tagEc2Instances(node) -func (tc *TaggingController) untagNodeResources(nodes []*v1.Node) { - for _, node := range nodes { - klog.Infof("Untagging resources for node %s with %s.", node.GetName(), tc.tags) + if !nodeTagged { + // Node tagged unsuccessfully, remove from the map + // so that we can try later if it still exists + delete(tc.taggedNodes, node.GetName()) + } } } -// syncDeletedNodes delete (k, v) from taggedNodes -// if it doesn't exist -func (tc *TaggingController) syncDeletedNodesToTaggedNodes() { - for k, v := range tc.taggedNodes { - if v == false { - delete(tc.taggedNodes, k) +// tagEc2Instances applies the provided tags to each EC2 instances in +// the cluster. Return if a node is tagged or not +func (tc *TaggingController) tagEc2Instances(node *v1.Node) bool { + instanceId, err := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() + + if err != nil { + klog.Errorf("Error in getting instanceID for node %s, error: %v", node.GetName(), err) + return false + } else { + err := tc.cloud.TagResource(string(instanceId), tc.tags) + + if err != nil { + klog.Errorf("Error in tagging EC2 instance for node %s, error: %v", node.GetName(), err) + return false } } + + return true } -// tagEc2Instances applies the provided tags to each EC2 instances in -// the cluster. -func (tc *TaggingController) tagEc2Instances(nodes []*v1.Node) { - var instanceIds []*string +// untagNodeResources untag node resources from a list of node +// If we want to untag more resources, modify this function appropriately +func (tc *TaggingController) untagNodeResources(nodes []*v1.Node) { for _, node := range nodes { - instanceId, _ := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() - instanceIds = append(instanceIds, aws.String(string(instanceId))) - } + nodeUntagged := false + nodeUntagged = tc.untagEc2Instance(node) - tc.tagResources(instanceIds) + if nodeUntagged { + delete(tc.taggedNodes, node.GetName()) + } + } } -// TODO: call EC2 to tag instances -func (tc *TaggingController) tagResources(resourceIds []*string) { - //request := &ec2.CreateTagsInput{ - // Resources: resourceIds, - // Tags: tc.getTagsFromInputs(), - //} - // - //_, error := awsv1..EC2.CreateTags(request) - // - //if error != nil { - // klog.Errorf("Error occurred trying to tag resources, %s", error) - //} -} +// untagEc2Instances deletes the provided tags to each EC2 instances in +// the cluster. Return if a node is tagged or not +func (tc *TaggingController) untagEc2Instance(node *v1.Node) bool { + instanceId, err := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() -// Sample function demonstrating that we'll get the tag list from user -func (tc *TaggingController) getTagsFromInputs() []*ec2.Tag { - var awsTags []*ec2.Tag - tag := &ec2.Tag{ - Key: aws.String("Sample Key"), - Value: aws.String("Sample value"), + if err != nil { + klog.Errorf("Error in getting instanceID for node %s, error: %v", node.GetName(), err) + return false + } else { + err := tc.cloud.UntagResource(string(instanceId), tc.tags) + + if err != nil { + klog.Errorf("Error in untagging EC2 instance for node %s, error: %v", node.GetName(), err) + return false + } } - awsTags = append(awsTags, tag) - return awsTags + return true } diff --git a/pkg/controllers/tagging/tagging_controller_wrapper.go b/pkg/controllers/tagging/tagging_controller_wrapper.go new file mode 100644 index 0000000000..c852f333af --- /dev/null +++ b/pkg/controllers/tagging/tagging_controller_wrapper.go @@ -0,0 +1,54 @@ +package tagging + +import ( + "context" + + cloudprovider "k8s.io/cloud-provider" + "k8s.io/cloud-provider/app" + cloudcontrollerconfig "k8s.io/cloud-provider/app/config" + genericcontrollermanager "k8s.io/controller-manager/app" + "k8s.io/controller-manager/controller" + "k8s.io/klog/v2" + + "k8s.io/cloud-provider-aws/pkg/controllers/options" +) + +const ( + TaggingControllerClientName = "tagging-controller" + TaggingControllerKey = "tagging" +) + +type TaggingControllerWrapper struct { + Options options.TaggingControllerOptions +} + +// StartTaggingControllerWrapper is used to take cloud config as input and start the tagging controller +func (tc *TaggingControllerWrapper) StartTaggingControllerWrapper(initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) app.InitFunc { + return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) { + return tc.startTaggingController(ctx, initContext, completedConfig, cloud) + } +} + +func (tc *TaggingControllerWrapper) startTaggingController(ctx context.Context, initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) (controller.Interface, bool, error) { + err := tc.Options.Validate() + if err != nil { + klog.Fatal("Tagging controller inputs are not properly set.") + } + + // Start the TaggingController + taggingcontroller, err := NewTaggingController( + completedConfig.SharedInformers.Core().V1().Nodes(), + completedConfig.ClientBuilder.ClientOrDie(initContext.ClientName), + cloud, + completedConfig.ComponentConfig.KubeCloudShared.NodeMonitorPeriod.Duration, + tc.Options.Tags) + + if err != nil { + klog.Warningf("failed to start tagging controller: %s", err) + return nil, false, nil + } + + go taggingcontroller.Run(ctx) + + return nil, true, nil +} diff --git a/pkg/providers/v1/aws.go b/pkg/providers/v1/aws.go new file mode 100644 index 0000000000..e1fcdb5de6 --- /dev/null +++ b/pkg/providers/v1/aws.go @@ -0,0 +1,5162 @@ +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package aws + +import ( + "context" + "errors" + "fmt" + "io" + "net" + "regexp" + "sort" + "strconv" + "strings" + "sync" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/credentials/stscreds" + "github.com/aws/aws-sdk-go/aws/ec2metadata" + "github.com/aws/aws-sdk-go/aws/endpoints" + "github.com/aws/aws-sdk-go/aws/request" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/autoscaling" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/ec2/ec2iface" + "github.com/aws/aws-sdk-go/service/elb" + "github.com/aws/aws-sdk-go/service/elbv2" + "github.com/aws/aws-sdk-go/service/kms" + "github.com/aws/aws-sdk-go/service/sts" + "gopkg.in/gcfg.v1" + v1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" + netutils "k8s.io/utils/net" + + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/informers" + informercorev1 "k8s.io/client-go/informers/core/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + v1core "k8s.io/client-go/kubernetes/typed/core/v1" + "k8s.io/client-go/pkg/version" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/record" + cloudprovider "k8s.io/cloud-provider" + nodehelpers "k8s.io/cloud-provider/node/helpers" + servicehelpers "k8s.io/cloud-provider/service/helpers" + cloudvolume "k8s.io/cloud-provider/volume" + volerr "k8s.io/cloud-provider/volume/errors" + volumehelpers "k8s.io/cloud-provider/volume/helpers" +) + +// NLBHealthCheckRuleDescription is the comment used on a security group rule to +// indicate that it is used for health checks +const NLBHealthCheckRuleDescription = "kubernetes.io/rule/nlb/health" + +// NLBClientRuleDescription is the comment used on a security group rule to +// indicate that it is used for client traffic +const NLBClientRuleDescription = "kubernetes.io/rule/nlb/client" + +// NLBMtuDiscoveryRuleDescription is the comment used on a security group rule +// to indicate that it is used for mtu discovery +const NLBMtuDiscoveryRuleDescription = "kubernetes.io/rule/nlb/mtu" + +// ProviderName is the name of this cloud provider. +const ProviderName = "aws" + +// TagNameKubernetesService is the tag name we use to differentiate multiple +// services. Used currently for ELBs only. +const TagNameKubernetesService = "kubernetes.io/service-name" + +// TagNameSubnetInternalELB is the tag name used on a subnet to designate that +// it should be used for internal ELBs +const TagNameSubnetInternalELB = "kubernetes.io/role/internal-elb" + +// TagNameSubnetPublicELB is the tag name used on a subnet to designate that +// it should be used for internet ELBs +const TagNameSubnetPublicELB = "kubernetes.io/role/elb" + +// ServiceAnnotationLoadBalancerType is the annotation used on the service +// to indicate what type of Load Balancer we want. Right now, the only accepted +// value is "nlb" +const ServiceAnnotationLoadBalancerType = "service.beta.kubernetes.io/aws-load-balancer-type" + +// ServiceAnnotationLoadBalancerInternal is the annotation used on the service +// to indicate that we want an internal ELB. +const ServiceAnnotationLoadBalancerInternal = "service.beta.kubernetes.io/aws-load-balancer-internal" + +// ServiceAnnotationLoadBalancerProxyProtocol is the annotation used on the +// service to enable the proxy protocol on an ELB. Right now we only accept the +// value "*" which means enable the proxy protocol on all ELB backends. In the +// future we could adjust this to allow setting the proxy protocol only on +// certain backends. +const ServiceAnnotationLoadBalancerProxyProtocol = "service.beta.kubernetes.io/aws-load-balancer-proxy-protocol" + +// ServiceAnnotationLoadBalancerAccessLogEmitInterval is the annotation used to +// specify access log emit interval. +const ServiceAnnotationLoadBalancerAccessLogEmitInterval = "service.beta.kubernetes.io/aws-load-balancer-access-log-emit-interval" + +// ServiceAnnotationLoadBalancerAccessLogEnabled is the annotation used on the +// service to enable or disable access logs. +const ServiceAnnotationLoadBalancerAccessLogEnabled = "service.beta.kubernetes.io/aws-load-balancer-access-log-enabled" + +// ServiceAnnotationLoadBalancerAccessLogS3BucketName is the annotation used to +// specify access log s3 bucket name. +const ServiceAnnotationLoadBalancerAccessLogS3BucketName = "service.beta.kubernetes.io/aws-load-balancer-access-log-s3-bucket-name" + +// ServiceAnnotationLoadBalancerAccessLogS3BucketPrefix is the annotation used +// to specify access log s3 bucket prefix. +const ServiceAnnotationLoadBalancerAccessLogS3BucketPrefix = "service.beta.kubernetes.io/aws-load-balancer-access-log-s3-bucket-prefix" + +// ServiceAnnotationLoadBalancerConnectionDrainingEnabled is the annnotation +// used on the service to enable or disable connection draining. +const ServiceAnnotationLoadBalancerConnectionDrainingEnabled = "service.beta.kubernetes.io/aws-load-balancer-connection-draining-enabled" + +// ServiceAnnotationLoadBalancerConnectionDrainingTimeout is the annotation +// used on the service to specify a connection draining timeout. +const ServiceAnnotationLoadBalancerConnectionDrainingTimeout = "service.beta.kubernetes.io/aws-load-balancer-connection-draining-timeout" + +// ServiceAnnotationLoadBalancerConnectionIdleTimeout is the annotation used +// on the service to specify the idle connection timeout. +const ServiceAnnotationLoadBalancerConnectionIdleTimeout = "service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout" + +// ServiceAnnotationLoadBalancerCrossZoneLoadBalancingEnabled is the annotation +// used on the service to enable or disable cross-zone load balancing. +const ServiceAnnotationLoadBalancerCrossZoneLoadBalancingEnabled = "service.beta.kubernetes.io/aws-load-balancer-cross-zone-load-balancing-enabled" + +// ServiceAnnotationLoadBalancerExtraSecurityGroups is the annotation used +// on the service to specify additional security groups to be added to ELB created +const ServiceAnnotationLoadBalancerExtraSecurityGroups = "service.beta.kubernetes.io/aws-load-balancer-extra-security-groups" + +// ServiceAnnotationLoadBalancerSecurityGroups is the annotation used +// on the service to specify the security groups to be added to ELB created. Differently from the annotation +// "service.beta.kubernetes.io/aws-load-balancer-extra-security-groups", this replaces all other security groups previously assigned to the ELB. +const ServiceAnnotationLoadBalancerSecurityGroups = "service.beta.kubernetes.io/aws-load-balancer-security-groups" + +// ServiceAnnotationLoadBalancerCertificate is the annotation used on the +// service to request a secure listener. Value is a valid certificate ARN. +// For more, see http://docs.aws.amazon.com/ElasticLoadBalancing/latest/DeveloperGuide/elb-listener-config.html +// CertARN is an IAM or CM certificate ARN, e.g. arn:aws:acm:us-east-1:123456789012:certificate/12345678-1234-1234-1234-123456789012 +const ServiceAnnotationLoadBalancerCertificate = "service.beta.kubernetes.io/aws-load-balancer-ssl-cert" + +// ServiceAnnotationLoadBalancerSSLPorts is the annotation used on the service +// to specify a comma-separated list of ports that will use SSL/HTTPS +// listeners. Defaults to '*' (all). +const ServiceAnnotationLoadBalancerSSLPorts = "service.beta.kubernetes.io/aws-load-balancer-ssl-ports" + +// ServiceAnnotationLoadBalancerSSLNegotiationPolicy is the annotation used on +// the service to specify a SSL negotiation settings for the HTTPS/SSL listeners +// of your load balancer. Defaults to AWS's default +const ServiceAnnotationLoadBalancerSSLNegotiationPolicy = "service.beta.kubernetes.io/aws-load-balancer-ssl-negotiation-policy" + +// ServiceAnnotationLoadBalancerBEProtocol is the annotation used on the service +// to specify the protocol spoken by the backend (pod) behind a listener. +// If `http` (default) or `https`, an HTTPS listener that terminates the +// connection and parses headers is created. +// If set to `ssl` or `tcp`, a "raw" SSL listener is used. +// If set to `http` and `aws-load-balancer-ssl-cert` is not used then +// a HTTP listener is used. +const ServiceAnnotationLoadBalancerBEProtocol = "service.beta.kubernetes.io/aws-load-balancer-backend-protocol" + +// ServiceAnnotationLoadBalancerAdditionalTags is the annotation used on the service +// to specify a comma-separated list of key-value pairs which will be recorded as +// additional tags in the ELB. +// For example: "Key1=Val1,Key2=Val2,KeyNoVal1=,KeyNoVal2" +const ServiceAnnotationLoadBalancerAdditionalTags = "service.beta.kubernetes.io/aws-load-balancer-additional-resource-tags" + +// ServiceAnnotationLoadBalancerHealthCheckProtocol is the annotation used on the service to +// specify the protocol used for the ELB health check. Supported values are TCP, HTTP, HTTPS +// Default is TCP if externalTrafficPolicy is Cluster, HTTP if externalTrafficPolicy is Local +const ServiceAnnotationLoadBalancerHealthCheckProtocol = "service.beta.kubernetes.io/aws-load-balancer-healthcheck-protocol" + +// ServiceAnnotationLoadBalancerHealthCheckPort is the annotation used on the service to +// specify the port used for ELB health check. +// Default is traffic-port if externalTrafficPolicy is Cluster, healthCheckNodePort if externalTrafficPolicy is Local +const ServiceAnnotationLoadBalancerHealthCheckPort = "service.beta.kubernetes.io/aws-load-balancer-healthcheck-port" + +// ServiceAnnotationLoadBalancerHealthCheckPath is the annotation used on the service to +// specify the path for the ELB health check when the health check protocol is HTTP/HTTPS +// Defaults to /healthz if externalTrafficPolicy is Local, / otherwise +const ServiceAnnotationLoadBalancerHealthCheckPath = "service.beta.kubernetes.io/aws-load-balancer-healthcheck-path" + +// ServiceAnnotationLoadBalancerHCHealthyThreshold is the annotation used on +// the service to specify the number of successive successful health checks +// required for a backend to be considered healthy for traffic. For NLB, healthy-threshold +// and unhealthy-threshold must be equal. +const ServiceAnnotationLoadBalancerHCHealthyThreshold = "service.beta.kubernetes.io/aws-load-balancer-healthcheck-healthy-threshold" + +// ServiceAnnotationLoadBalancerHCUnhealthyThreshold is the annotation used +// on the service to specify the number of unsuccessful health checks +// required for a backend to be considered unhealthy for traffic +const ServiceAnnotationLoadBalancerHCUnhealthyThreshold = "service.beta.kubernetes.io/aws-load-balancer-healthcheck-unhealthy-threshold" + +// ServiceAnnotationLoadBalancerHCTimeout is the annotation used on the +// service to specify, in seconds, how long to wait before marking a health +// check as failed. +const ServiceAnnotationLoadBalancerHCTimeout = "service.beta.kubernetes.io/aws-load-balancer-healthcheck-timeout" + +// ServiceAnnotationLoadBalancerHCInterval is the annotation used on the +// service to specify, in seconds, the interval between health checks. +const ServiceAnnotationLoadBalancerHCInterval = "service.beta.kubernetes.io/aws-load-balancer-healthcheck-interval" + +// ServiceAnnotationLoadBalancerEIPAllocations is the annotation used on the +// service to specify a comma separated list of EIP allocations to use as +// static IP addresses for the NLB. Only supported on elbv2 (NLB) +const ServiceAnnotationLoadBalancerEIPAllocations = "service.beta.kubernetes.io/aws-load-balancer-eip-allocations" + +// ServiceAnnotationLoadBalancerTargetNodeLabels is the annotation used on the service +// to specify a comma-separated list of key-value pairs which will be used to select +// the target nodes for the load balancer +// For example: "Key1=Val1,Key2=Val2,KeyNoVal1=,KeyNoVal2" +const ServiceAnnotationLoadBalancerTargetNodeLabels = "service.beta.kubernetes.io/aws-load-balancer-target-node-labels" + +// ServiceAnnotationLoadBalancerSubnets is the annotation used on the service to specify the +// Availability Zone configuration for the load balancer. The values are comma separated list of +// subnetID or subnetName from different AZs +// By default, the controller will auto-discover the subnets. If there are multiple subnets per AZ, auto-discovery +// will break the tie in the following order - +// 1. prefer the subnet with the correct role tag. kubernetes.io/role/elb for public and kubernetes.io/role/internal-elb for private access +// 2. prefer the subnet with the cluster tag kubernetes.io/cluster/ +// 3. prefer the subnet that is first in lexicographic order +const ServiceAnnotationLoadBalancerSubnets = "service.beta.kubernetes.io/aws-load-balancer-subnets" + +// Event key when a volume is stuck on attaching state when being attached to a volume +const volumeAttachmentStuck = "VolumeAttachmentStuck" + +// Indicates that a node has volumes stuck in attaching state and hence it is not fit for scheduling more pods +const nodeWithImpairedVolumes = "NodeWithImpairedVolumes" + +const ( + // volumeAttachmentConsecutiveErrorLimit is the number of consecutive errors we will ignore when waiting for a volume to attach/detach + volumeAttachmentStatusConsecutiveErrorLimit = 10 + + // Attach typically takes 2-5 seconds (average is 2). Asking before 2 seconds is just waste of API quota. + volumeAttachmentStatusInitialDelay = 2 * time.Second + // Detach typically takes 5-10 seconds (average is 6). Asking before 5 seconds is just waste of API quota. + volumeDetachmentStatusInitialDelay = 5 * time.Second + // After the initial delay, poll attach/detach with exponential backoff (2046 seconds total) + volumeAttachmentStatusPollDelay = 2 * time.Second + volumeAttachmentStatusFactor = 2 + volumeAttachmentStatusSteps = 11 + + // createTag* is configuration of exponential backoff for CreateTag call. We + // retry mainly because if we create an object, we cannot tag it until it is + // "fully created" (eventual consistency). Starting with 1 second, doubling + // it every step and taking 9 steps results in 255 second total waiting + // time. + createTagInitialDelay = 1 * time.Second + createTagFactor = 2.0 + createTagSteps = 9 + + // volumeCreate* is configuration of exponential backoff for created volume. + // On a random AWS account (shared among several developers) it took 4s on + // average, 8s max. + volumeCreateInitialDelay = 5 * time.Second + volumeCreateBackoffFactor = 1.2 + volumeCreateBackoffSteps = 10 + + // Number of node names that can be added to a filter. The AWS limit is 200 + // but we are using a lower limit on purpose + filterNodeLimit = 150 + + // fargateNodeNamePrefix string is added to awsInstance nodeName and providerID of Fargate nodes. + fargateNodeNamePrefix = "fargate-" + + // privateDNSNamePrefix is the prefix added to ENI Private DNS Name. + privateDNSNamePrefix = "ip-" + + // rbnNamePrefix is the prefix added to ENI Private DNS Name with RBN. + rbnNamePrefix = "i-" +) + +const ( + // represents expected attachment status of a volume after attach + volumeAttachedStatus = "attached" + + // represents expected attachment status of a volume after detach + volumeDetachedStatus = "detached" +) + +// awsTagNameMasterRoles is a set of well-known AWS tag names that indicate the instance is a master +// The major consequence is that it is then not considered for AWS zone discovery for dynamic volume creation. +var awsTagNameMasterRoles = sets.NewString("kubernetes.io/role/master", "k8s.io/role/master") + +// Maps from backend protocol to ELB protocol +var backendProtocolMapping = map[string]string{ + "https": "https", + "http": "https", + "ssl": "ssl", + "tcp": "ssl", +} + +// MaxReadThenCreateRetries sets the maximum number of attempts we will make when +// we read to see if something exists and then try to create it if we didn't find it. +// This can fail once in a consistent system if done in parallel +// In an eventually consistent system, it could fail unboundedly +const MaxReadThenCreateRetries = 30 + +// DefaultVolumeType specifies which storage to use for newly created Volumes +// TODO: Remove when user/admin can configure volume types and thus we don't +// need hardcoded defaults. +const DefaultVolumeType = "gp2" + +// Services is an abstraction over AWS, to allow mocking/other implementations +type Services interface { + Compute(region string) (EC2, error) + LoadBalancing(region string) (ELB, error) + LoadBalancingV2(region string) (ELBV2, error) + Autoscaling(region string) (ASG, error) + Metadata() (EC2Metadata, error) + KeyManagement(region string) (KMS, error) +} + +// EC2 is an abstraction over AWS', to allow mocking/other implementations +// Note that the DescribeX functions return a list, so callers don't need to deal with paging +// TODO: Should we rename this to AWS (EBS & ELB are not technically part of EC2) +type EC2 interface { + // Query EC2 for instances matching the filter + DescribeInstances(request *ec2.DescribeInstancesInput) ([]*ec2.Instance, error) + + // Attach a volume to an instance + AttachVolume(*ec2.AttachVolumeInput) (*ec2.VolumeAttachment, error) + // Detach a volume from an instance it is attached to + DetachVolume(request *ec2.DetachVolumeInput) (resp *ec2.VolumeAttachment, err error) + // Lists volumes + DescribeVolumes(request *ec2.DescribeVolumesInput) ([]*ec2.Volume, error) + // Create an EBS volume + CreateVolume(request *ec2.CreateVolumeInput) (resp *ec2.Volume, err error) + // Delete an EBS volume + DeleteVolume(*ec2.DeleteVolumeInput) (*ec2.DeleteVolumeOutput, error) + + ModifyVolume(*ec2.ModifyVolumeInput) (*ec2.ModifyVolumeOutput, error) + + DescribeVolumeModifications(*ec2.DescribeVolumesModificationsInput) ([]*ec2.VolumeModification, error) + + DescribeSecurityGroups(request *ec2.DescribeSecurityGroupsInput) ([]*ec2.SecurityGroup, error) + + CreateSecurityGroup(*ec2.CreateSecurityGroupInput) (*ec2.CreateSecurityGroupOutput, error) + DeleteSecurityGroup(request *ec2.DeleteSecurityGroupInput) (*ec2.DeleteSecurityGroupOutput, error) + + AuthorizeSecurityGroupIngress(*ec2.AuthorizeSecurityGroupIngressInput) (*ec2.AuthorizeSecurityGroupIngressOutput, error) + RevokeSecurityGroupIngress(*ec2.RevokeSecurityGroupIngressInput) (*ec2.RevokeSecurityGroupIngressOutput, error) + + DescribeSubnets(*ec2.DescribeSubnetsInput) ([]*ec2.Subnet, error) + + CreateTags(*ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) + DeleteTags(input *ec2.DeleteTagsInput) (*ec2.DeleteTagsOutput, error) + + DescribeRouteTables(request *ec2.DescribeRouteTablesInput) ([]*ec2.RouteTable, error) + CreateRoute(request *ec2.CreateRouteInput) (*ec2.CreateRouteOutput, error) + DeleteRoute(request *ec2.DeleteRouteInput) (*ec2.DeleteRouteOutput, error) + + ModifyInstanceAttribute(request *ec2.ModifyInstanceAttributeInput) (*ec2.ModifyInstanceAttributeOutput, error) + + DescribeVpcs(input *ec2.DescribeVpcsInput) (*ec2.DescribeVpcsOutput, error) + + DescribeNetworkInterfaces(input *ec2.DescribeNetworkInterfacesInput) (*ec2.DescribeNetworkInterfacesOutput, error) +} + +// ELB is a simple pass-through of AWS' ELB client interface, which allows for testing +type ELB interface { + CreateLoadBalancer(*elb.CreateLoadBalancerInput) (*elb.CreateLoadBalancerOutput, error) + DeleteLoadBalancer(*elb.DeleteLoadBalancerInput) (*elb.DeleteLoadBalancerOutput, error) + DescribeLoadBalancers(*elb.DescribeLoadBalancersInput) (*elb.DescribeLoadBalancersOutput, error) + AddTags(*elb.AddTagsInput) (*elb.AddTagsOutput, error) + RegisterInstancesWithLoadBalancer(*elb.RegisterInstancesWithLoadBalancerInput) (*elb.RegisterInstancesWithLoadBalancerOutput, error) + DeregisterInstancesFromLoadBalancer(*elb.DeregisterInstancesFromLoadBalancerInput) (*elb.DeregisterInstancesFromLoadBalancerOutput, error) + CreateLoadBalancerPolicy(*elb.CreateLoadBalancerPolicyInput) (*elb.CreateLoadBalancerPolicyOutput, error) + SetLoadBalancerPoliciesForBackendServer(*elb.SetLoadBalancerPoliciesForBackendServerInput) (*elb.SetLoadBalancerPoliciesForBackendServerOutput, error) + SetLoadBalancerPoliciesOfListener(input *elb.SetLoadBalancerPoliciesOfListenerInput) (*elb.SetLoadBalancerPoliciesOfListenerOutput, error) + DescribeLoadBalancerPolicies(input *elb.DescribeLoadBalancerPoliciesInput) (*elb.DescribeLoadBalancerPoliciesOutput, error) + + DetachLoadBalancerFromSubnets(*elb.DetachLoadBalancerFromSubnetsInput) (*elb.DetachLoadBalancerFromSubnetsOutput, error) + AttachLoadBalancerToSubnets(*elb.AttachLoadBalancerToSubnetsInput) (*elb.AttachLoadBalancerToSubnetsOutput, error) + + CreateLoadBalancerListeners(*elb.CreateLoadBalancerListenersInput) (*elb.CreateLoadBalancerListenersOutput, error) + DeleteLoadBalancerListeners(*elb.DeleteLoadBalancerListenersInput) (*elb.DeleteLoadBalancerListenersOutput, error) + + ApplySecurityGroupsToLoadBalancer(*elb.ApplySecurityGroupsToLoadBalancerInput) (*elb.ApplySecurityGroupsToLoadBalancerOutput, error) + + ConfigureHealthCheck(*elb.ConfigureHealthCheckInput) (*elb.ConfigureHealthCheckOutput, error) + + DescribeLoadBalancerAttributes(*elb.DescribeLoadBalancerAttributesInput) (*elb.DescribeLoadBalancerAttributesOutput, error) + ModifyLoadBalancerAttributes(*elb.ModifyLoadBalancerAttributesInput) (*elb.ModifyLoadBalancerAttributesOutput, error) +} + +// ELBV2 is a simple pass-through of AWS' ELBV2 client interface, which allows for testing +type ELBV2 interface { + AddTags(input *elbv2.AddTagsInput) (*elbv2.AddTagsOutput, error) + + CreateLoadBalancer(*elbv2.CreateLoadBalancerInput) (*elbv2.CreateLoadBalancerOutput, error) + DescribeLoadBalancers(*elbv2.DescribeLoadBalancersInput) (*elbv2.DescribeLoadBalancersOutput, error) + DeleteLoadBalancer(*elbv2.DeleteLoadBalancerInput) (*elbv2.DeleteLoadBalancerOutput, error) + + ModifyLoadBalancerAttributes(*elbv2.ModifyLoadBalancerAttributesInput) (*elbv2.ModifyLoadBalancerAttributesOutput, error) + DescribeLoadBalancerAttributes(*elbv2.DescribeLoadBalancerAttributesInput) (*elbv2.DescribeLoadBalancerAttributesOutput, error) + + CreateTargetGroup(*elbv2.CreateTargetGroupInput) (*elbv2.CreateTargetGroupOutput, error) + DescribeTargetGroups(*elbv2.DescribeTargetGroupsInput) (*elbv2.DescribeTargetGroupsOutput, error) + ModifyTargetGroup(*elbv2.ModifyTargetGroupInput) (*elbv2.ModifyTargetGroupOutput, error) + DeleteTargetGroup(*elbv2.DeleteTargetGroupInput) (*elbv2.DeleteTargetGroupOutput, error) + + DescribeTargetHealth(input *elbv2.DescribeTargetHealthInput) (*elbv2.DescribeTargetHealthOutput, error) + + DescribeTargetGroupAttributes(*elbv2.DescribeTargetGroupAttributesInput) (*elbv2.DescribeTargetGroupAttributesOutput, error) + ModifyTargetGroupAttributes(*elbv2.ModifyTargetGroupAttributesInput) (*elbv2.ModifyTargetGroupAttributesOutput, error) + + RegisterTargets(*elbv2.RegisterTargetsInput) (*elbv2.RegisterTargetsOutput, error) + DeregisterTargets(*elbv2.DeregisterTargetsInput) (*elbv2.DeregisterTargetsOutput, error) + + CreateListener(*elbv2.CreateListenerInput) (*elbv2.CreateListenerOutput, error) + DescribeListeners(*elbv2.DescribeListenersInput) (*elbv2.DescribeListenersOutput, error) + DeleteListener(*elbv2.DeleteListenerInput) (*elbv2.DeleteListenerOutput, error) + ModifyListener(*elbv2.ModifyListenerInput) (*elbv2.ModifyListenerOutput, error) + + WaitUntilLoadBalancersDeleted(*elbv2.DescribeLoadBalancersInput) error +} + +// ASG is a simple pass-through of the Autoscaling client interface, which +// allows for testing. +type ASG interface { + UpdateAutoScalingGroup(*autoscaling.UpdateAutoScalingGroupInput) (*autoscaling.UpdateAutoScalingGroupOutput, error) + DescribeAutoScalingGroups(*autoscaling.DescribeAutoScalingGroupsInput) (*autoscaling.DescribeAutoScalingGroupsOutput, error) +} + +// KMS is a simple pass-through of the Key Management Service client interface, +// which allows for testing. +type KMS interface { + DescribeKey(*kms.DescribeKeyInput) (*kms.DescribeKeyOutput, error) +} + +// EC2Metadata is an abstraction over the AWS metadata service. +type EC2Metadata interface { + // Query the EC2 metadata service (used to discover instance-id etc) + GetMetadata(path string) (string, error) +} + +// AWS volume types +const ( + // Provisioned IOPS SSD + VolumeTypeIO1 = "io1" + // General Purpose SSD + VolumeTypeGP2 = "gp2" + // Cold HDD (sc1) + VolumeTypeSC1 = "sc1" + // Throughput Optimized HDD + VolumeTypeST1 = "st1" +) + +// AWS provisioning limits. +// Source: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html +const ( + MinTotalIOPS = 100 + MaxTotalIOPS = 64000 +) + +// VolumeOptions specifies capacity and tags for a volume. +type VolumeOptions struct { + CapacityGB int + Tags map[string]string + VolumeType string + AvailabilityZone string + // IOPSPerGB x CapacityGB will give total IOPS of the volume to create. + // Calculated total IOPS will be capped at MaxTotalIOPS. + IOPSPerGB int + Encrypted bool + // fully qualified resource name to the key to use for encryption. + // example: arn:aws:kms:us-east-1:012345678910:key/abcd1234-a123-456a-a12b-a123b4cd56ef + KmsKeyID string +} + +// Volumes is an interface for managing cloud-provisioned volumes +// TODO: Allow other clouds to implement this +type Volumes interface { + // Attach the disk to the node with the specified NodeName + // nodeName can be empty to mean "the instance on which we are running" + // Returns the device (e.g. /dev/xvdf) where we attached the volume + AttachDisk(diskName KubernetesVolumeID, nodeName types.NodeName) (string, error) + // Detach the disk from the node with the specified NodeName + // nodeName can be empty to mean "the instance on which we are running" + // Returns the device where the volume was attached + DetachDisk(diskName KubernetesVolumeID, nodeName types.NodeName) (string, error) + + // Create a volume with the specified options + CreateDisk(volumeOptions *VolumeOptions) (volumeName KubernetesVolumeID, err error) + // Delete the specified volume + // Returns true iff the volume was deleted + // If the was not found, returns (false, nil) + DeleteDisk(volumeName KubernetesVolumeID) (bool, error) + + // Get labels to apply to volume on creation + GetVolumeLabels(volumeName KubernetesVolumeID) (map[string]string, error) + + // Get volume's disk path from volume name + // return the device path where the volume is attached + GetDiskPath(volumeName KubernetesVolumeID) (string, error) + + // Check if the volume is already attached to the node with the specified NodeName + DiskIsAttached(diskName KubernetesVolumeID, nodeName types.NodeName) (bool, error) + + // Check if disks specified in argument map are still attached to their respective nodes. + DisksAreAttached(map[types.NodeName][]KubernetesVolumeID) (map[types.NodeName]map[KubernetesVolumeID]bool, error) + + // Expand the disk to new size + ResizeDisk(diskName KubernetesVolumeID, oldSize resource.Quantity, newSize resource.Quantity) (resource.Quantity, error) +} + +// InstanceGroups is an interface for managing cloud-managed instance groups / autoscaling instance groups +// TODO: Allow other clouds to implement this +type InstanceGroups interface { + // Set the size to the fixed size + ResizeInstanceGroup(instanceGroupName string, size int) error + // Queries the cloud provider for information about the specified instance group + DescribeInstanceGroup(instanceGroupName string) (InstanceGroupInfo, error) +} + +// InstanceGroupInfo is returned by InstanceGroups.Describe, and exposes information about the group. +type InstanceGroupInfo interface { + // The number of instances currently running under control of this group + CurrentSize() (int, error) +} + +var _ cloudprovider.Interface = (*Cloud)(nil) +var _ cloudprovider.Instances = (*Cloud)(nil) +var _ cloudprovider.LoadBalancer = (*Cloud)(nil) +var _ cloudprovider.Routes = (*Cloud)(nil) +var _ cloudprovider.Zones = (*Cloud)(nil) +var _ cloudprovider.PVLabeler = (*Cloud)(nil) + +// Cloud is an implementation of Interface, LoadBalancer and Instances for Amazon Web Services. +type Cloud struct { + ec2 EC2 + elb ELB + elbv2 ELBV2 + asg ASG + kms KMS + metadata EC2Metadata + cfg *CloudConfig + region string + vpcID string + + tagging awsTagging + + // The AWS instance that we are running on + // Note that we cache some state in awsInstance (mountpoints), so we must preserve the instance + selfAWSInstance *awsInstance + + instanceCache instanceCache + + clientBuilder cloudprovider.ControllerClientBuilder + kubeClient clientset.Interface + + nodeInformer informercorev1.NodeInformer + // Extract the function out to make it easier to test + nodeInformerHasSynced cache.InformerSynced + + eventBroadcaster record.EventBroadcaster + eventRecorder record.EventRecorder + + // We keep an active list of devices we have assigned but not yet + // attached, to avoid a race condition where we assign a device mapping + // and then get a second request before we attach the volume + attachingMutex sync.Mutex + attaching map[types.NodeName]map[mountDevice]EBSVolumeID + + // state of our device allocator for each node + deviceAllocators map[types.NodeName]DeviceAllocator +} + +var _ Volumes = &Cloud{} + +// CloudConfig wraps the settings for the AWS cloud provider. +// NOTE: Cloud config files should follow the same Kubernetes deprecation policy as +// flags or CLIs. Config fields should not change behavior in incompatible ways and +// should be deprecated for at least 2 release prior to removing. +// See https://kubernetes.io/docs/reference/using-api/deprecation-policy/#deprecating-a-flag-or-cli +// for more details. +type CloudConfig struct { + Global struct { + // TODO: Is there any use for this? We can get it from the instance metadata service + // Maybe if we're not running on AWS, e.g. bootstrap; for now it is not very useful + Zone string + + // The AWS VPC flag enables the possibility to run the master components + // on a different aws account, on a different cloud provider or on-premises. + // If the flag is set also the KubernetesClusterTag must be provided + VPC string + // SubnetID enables using a specific subnet to use for ELB's + SubnetID string + // RouteTableID enables using a specific RouteTable + RouteTableID string + + // RoleARN is the IAM role to assume when interaction with AWS APIs. + RoleARN string + + // KubernetesClusterTag is the legacy cluster id we'll use to identify our cluster resources + KubernetesClusterTag string + // KubernetesClusterID is the cluster id we'll use to identify our cluster resources + KubernetesClusterID string + + //The aws provider creates an inbound rule per load balancer on the node security + //group. However, this can run into the AWS security group rule limit of 50 if + //many LoadBalancers are created. + // + //This flag disables the automatic ingress creation. It requires that the user + //has setup a rule that allows inbound traffic on kubelet ports from the + //local VPC subnet (so load balancers can access it). E.g. 10.82.0.0/16 30000-32000. + DisableSecurityGroupIngress bool + + //AWS has a hard limit of 500 security groups. For large clusters creating a security group for each ELB + //can cause the max number of security groups to be reached. If this is set instead of creating a new + //Security group for each ELB this security group will be used instead. + ElbSecurityGroup string + + //During the instantiation of an new AWS cloud provider, the detected region + //is validated against a known set of regions. + // + //In a non-standard, AWS like environment (e.g. Eucalyptus), this check may + //be undesirable. Setting this to true will disable the check and provide + //a warning that the check was skipped. Please note that this is an + //experimental feature and work-in-progress for the moment. If you find + //yourself in an non-AWS cloud and open an issue, please indicate that in the + //issue body. + DisableStrictZoneCheck bool + + // NodeIPFamilies determines which IP addresses are added to node objects and their ordering. + NodeIPFamilies []string + } + // [ServiceOverride "1"] + // Service = s3 + // Region = region1 + // URL = https://s3.foo.bar + // SigningRegion = signing_region + // SigningMethod = signing_method + // + // [ServiceOverride "2"] + // Service = ec2 + // Region = region2 + // URL = https://ec2.foo.bar + // SigningRegion = signing_region + // SigningMethod = signing_method + ServiceOverride map[string]*struct { + Service string + Region string + URL string + SigningRegion string + SigningMethod string + SigningName string + } +} + +func (cfg *CloudConfig) validateOverrides() error { + if len(cfg.ServiceOverride) == 0 { + return nil + } + set := make(map[string]bool) + for onum, ovrd := range cfg.ServiceOverride { + // Note: gcfg does not space trim, so we have to when comparing to empty string "" + name := strings.TrimSpace(ovrd.Service) + if name == "" { + return fmt.Errorf("service name is missing [Service is \"\"] in override %s", onum) + } + // insure the map service name is space trimmed + ovrd.Service = name + + region := strings.TrimSpace(ovrd.Region) + if region == "" { + return fmt.Errorf("service region is missing [Region is \"\"] in override %s", onum) + } + // insure the map region is space trimmed + ovrd.Region = region + + url := strings.TrimSpace(ovrd.URL) + if url == "" { + return fmt.Errorf("url is missing [URL is \"\"] in override %s", onum) + } + signingRegion := strings.TrimSpace(ovrd.SigningRegion) + if signingRegion == "" { + return fmt.Errorf("signingRegion is missing [SigningRegion is \"\"] in override %s", onum) + } + signature := name + "_" + region + if set[signature] { + return fmt.Errorf("duplicate entry found for service override [%s] (%s in %s)", onum, name, region) + } + set[signature] = true + } + return nil +} + +func (cfg *CloudConfig) getResolver() endpoints.ResolverFunc { + defaultResolver := endpoints.DefaultResolver() + defaultResolverFn := func(service, region string, + optFns ...func(*endpoints.Options)) (endpoints.ResolvedEndpoint, error) { + return defaultResolver.EndpointFor(service, region, optFns...) + } + if len(cfg.ServiceOverride) == 0 { + return defaultResolverFn + } + + return func(service, region string, + optFns ...func(*endpoints.Options)) (endpoints.ResolvedEndpoint, error) { + for _, override := range cfg.ServiceOverride { + if override.Service == service && override.Region == region { + return endpoints.ResolvedEndpoint{ + URL: override.URL, + SigningRegion: override.SigningRegion, + SigningMethod: override.SigningMethod, + SigningName: override.SigningName, + }, nil + } + } + return defaultResolver.EndpointFor(service, region, optFns...) + } +} + +// awsSdkEC2 is an implementation of the EC2 interface, backed by aws-sdk-go +type awsSdkEC2 struct { + ec2 ec2iface.EC2API +} + +// Interface to make the CloudConfig immutable for awsSDKProvider +type awsCloudConfigProvider interface { + getResolver() endpoints.ResolverFunc +} + +type awsSDKProvider struct { + creds *credentials.Credentials + cfg awsCloudConfigProvider + + mutex sync.Mutex + regionDelayers map[string]*CrossRequestRetryDelay +} + +func newAWSSDKProvider(creds *credentials.Credentials, cfg *CloudConfig) *awsSDKProvider { + return &awsSDKProvider{ + creds: creds, + cfg: cfg, + regionDelayers: make(map[string]*CrossRequestRetryDelay), + } +} + +func (p *awsSDKProvider) addHandlers(regionName string, h *request.Handlers) { + h.Build.PushFrontNamed(request.NamedHandler{ + Name: "k8s/user-agent", + Fn: request.MakeAddToUserAgentHandler("kubernetes", version.Get().String()), + }) + + h.Sign.PushFrontNamed(request.NamedHandler{ + Name: "k8s/logger", + Fn: awsHandlerLogger, + }) + + delayer := p.getCrossRequestRetryDelay(regionName) + if delayer != nil { + h.Sign.PushFrontNamed(request.NamedHandler{ + Name: "k8s/delay-presign", + Fn: delayer.BeforeSign, + }) + + h.AfterRetry.PushFrontNamed(request.NamedHandler{ + Name: "k8s/delay-afterretry", + Fn: delayer.AfterRetry, + }) + } + + p.addAPILoggingHandlers(h) +} + +func (p *awsSDKProvider) addAPILoggingHandlers(h *request.Handlers) { + h.Send.PushBackNamed(request.NamedHandler{ + Name: "k8s/api-request", + Fn: awsSendHandlerLogger, + }) + + h.ValidateResponse.PushFrontNamed(request.NamedHandler{ + Name: "k8s/api-validate-response", + Fn: awsValidateResponseHandlerLogger, + }) +} + +// Get a CrossRequestRetryDelay, scoped to the region, not to the request. +// This means that when we hit a limit on a call, we will delay _all_ calls to the API. +// We do this to protect the AWS account from becoming overloaded and effectively locked. +// We also log when we hit request limits. +// Note that this delays the current goroutine; this is bad behaviour and will +// likely cause k8s to become slow or unresponsive for cloud operations. +// However, this throttle is intended only as a last resort. When we observe +// this throttling, we need to address the root cause (e.g. add a delay to a +// controller retry loop) +func (p *awsSDKProvider) getCrossRequestRetryDelay(regionName string) *CrossRequestRetryDelay { + p.mutex.Lock() + defer p.mutex.Unlock() + + delayer, found := p.regionDelayers[regionName] + if !found { + delayer = NewCrossRequestRetryDelay() + p.regionDelayers[regionName] = delayer + } + return delayer +} + +// SetInformers implements InformerUser interface by setting up informer-fed caches for aws lib to +// leverage Kubernetes API for caching +func (c *Cloud) SetInformers(informerFactory informers.SharedInformerFactory) { + klog.Infof("Setting up informers for Cloud") + c.nodeInformer = informerFactory.Core().V1().Nodes() + c.nodeInformerHasSynced = c.nodeInformer.Informer().HasSynced +} + +func (p *awsSDKProvider) Compute(regionName string) (EC2, error) { + awsConfig := &aws.Config{ + Region: ®ionName, + Credentials: p.creds, + } + awsConfig = awsConfig.WithCredentialsChainVerboseErrors(true). + WithEndpointResolver(p.cfg.getResolver()) + sess, err := session.NewSessionWithOptions(session.Options{ + Config: *awsConfig, + SharedConfigState: session.SharedConfigEnable, + }) + + if err != nil { + return nil, fmt.Errorf("unable to initialize AWS session: %v", err) + } + service := ec2.New(sess) + + p.addHandlers(regionName, &service.Handlers) + + ec2 := &awsSdkEC2{ + ec2: service, + } + return ec2, nil +} + +func (p *awsSDKProvider) LoadBalancing(regionName string) (ELB, error) { + awsConfig := &aws.Config{ + Region: ®ionName, + Credentials: p.creds, + } + awsConfig = awsConfig.WithCredentialsChainVerboseErrors(true). + WithEndpointResolver(p.cfg.getResolver()) + sess, err := session.NewSessionWithOptions(session.Options{ + Config: *awsConfig, + SharedConfigState: session.SharedConfigEnable, + }) + if err != nil { + return nil, fmt.Errorf("unable to initialize AWS session: %v", err) + } + elbClient := elb.New(sess) + p.addHandlers(regionName, &elbClient.Handlers) + + return elbClient, nil +} + +func (p *awsSDKProvider) LoadBalancingV2(regionName string) (ELBV2, error) { + awsConfig := &aws.Config{ + Region: ®ionName, + Credentials: p.creds, + } + awsConfig = awsConfig.WithCredentialsChainVerboseErrors(true). + WithEndpointResolver(p.cfg.getResolver()) + sess, err := session.NewSessionWithOptions(session.Options{ + Config: *awsConfig, + SharedConfigState: session.SharedConfigEnable, + }) + if err != nil { + return nil, fmt.Errorf("unable to initialize AWS session: %v", err) + } + elbClient := elbv2.New(sess) + + p.addHandlers(regionName, &elbClient.Handlers) + + return elbClient, nil +} + +func (p *awsSDKProvider) Autoscaling(regionName string) (ASG, error) { + awsConfig := &aws.Config{ + Region: ®ionName, + Credentials: p.creds, + } + awsConfig = awsConfig.WithCredentialsChainVerboseErrors(true). + WithEndpointResolver(p.cfg.getResolver()) + sess, err := session.NewSessionWithOptions(session.Options{ + Config: *awsConfig, + SharedConfigState: session.SharedConfigEnable, + }) + if err != nil { + return nil, fmt.Errorf("unable to initialize AWS session: %v", err) + } + client := autoscaling.New(sess) + + p.addHandlers(regionName, &client.Handlers) + + return client, nil +} + +func (p *awsSDKProvider) Metadata() (EC2Metadata, error) { + sess, err := session.NewSession(&aws.Config{ + EndpointResolver: p.cfg.getResolver(), + }) + if err != nil { + return nil, fmt.Errorf("unable to initialize AWS session: %v", err) + } + client := ec2metadata.New(sess) + p.addAPILoggingHandlers(&client.Handlers) + return client, nil +} + +func (p *awsSDKProvider) KeyManagement(regionName string) (KMS, error) { + awsConfig := &aws.Config{ + Region: ®ionName, + Credentials: p.creds, + } + awsConfig = awsConfig.WithCredentialsChainVerboseErrors(true). + WithEndpointResolver(p.cfg.getResolver()) + sess, err := session.NewSessionWithOptions(session.Options{ + Config: *awsConfig, + SharedConfigState: session.SharedConfigEnable, + }) + if err != nil { + return nil, fmt.Errorf("unable to initialize AWS session: %v", err) + } + kmsClient := kms.New(sess) + + p.addHandlers(regionName, &kmsClient.Handlers) + + return kmsClient, nil +} + +func newEc2Filter(name string, values ...string) *ec2.Filter { + filter := &ec2.Filter{ + Name: aws.String(name), + } + for _, value := range values { + filter.Values = append(filter.Values, aws.String(value)) + } + return filter +} + +// AddSSHKeyToAllInstances is currently not implemented. +func (c *Cloud) AddSSHKeyToAllInstances(ctx context.Context, user string, keyData []byte) error { + return cloudprovider.NotImplemented +} + +// CurrentNodeName returns the name of the current node +func (c *Cloud) CurrentNodeName(ctx context.Context, hostname string) (types.NodeName, error) { + return c.selfAWSInstance.nodeName, nil +} + +// Implementation of EC2.Instances +func (s *awsSdkEC2) DescribeInstances(request *ec2.DescribeInstancesInput) ([]*ec2.Instance, error) { + // Instances are paged + results := []*ec2.Instance{} + var nextToken *string + requestTime := time.Now() + + if request.MaxResults == nil && len(request.InstanceIds) == 0 { + // MaxResults must be set in order for pagination to work + // MaxResults cannot be set with InstanceIds + request.MaxResults = aws.Int64(1000) + } + + for { + response, err := s.ec2.DescribeInstances(request) + if err != nil { + recordAWSMetric("describe_instance", 0, err) + return nil, fmt.Errorf("error listing AWS instances: %q", err) + } + + for _, reservation := range response.Reservations { + results = append(results, reservation.Instances...) + } + + nextToken = response.NextToken + if aws.StringValue(nextToken) == "" { + break + } + request.NextToken = nextToken + } + timeTaken := time.Since(requestTime).Seconds() + recordAWSMetric("describe_instance", timeTaken, nil) + return results, nil +} + +// DescribeNetworkInterfaces describes network interface provided in the input. +func (s *awsSdkEC2) DescribeNetworkInterfaces(input *ec2.DescribeNetworkInterfacesInput) (*ec2.DescribeNetworkInterfacesOutput, error) { + requestTime := time.Now() + resp, err := s.ec2.DescribeNetworkInterfaces(input) + timeTaken := time.Since(requestTime).Seconds() + recordAWSMetric("describe_network_interfaces", timeTaken, err) + return resp, err +} + +// Implements EC2.DescribeSecurityGroups +func (s *awsSdkEC2) DescribeSecurityGroups(request *ec2.DescribeSecurityGroupsInput) ([]*ec2.SecurityGroup, error) { + // Security groups are paged + results := []*ec2.SecurityGroup{} + var nextToken *string + requestTime := time.Now() + for { + response, err := s.ec2.DescribeSecurityGroups(request) + if err != nil { + recordAWSMetric("describe_security_groups", 0, err) + return nil, fmt.Errorf("error listing AWS security groups: %q", err) + } + + results = append(results, response.SecurityGroups...) + + nextToken = response.NextToken + if aws.StringValue(nextToken) == "" { + break + } + request.NextToken = nextToken + } + timeTaken := time.Since(requestTime).Seconds() + recordAWSMetric("describe_security_groups", timeTaken, nil) + return results, nil +} + +func (s *awsSdkEC2) AttachVolume(request *ec2.AttachVolumeInput) (*ec2.VolumeAttachment, error) { + requestTime := time.Now() + resp, err := s.ec2.AttachVolume(request) + timeTaken := time.Since(requestTime).Seconds() + recordAWSMetric("attach_volume", timeTaken, err) + return resp, err +} + +func (s *awsSdkEC2) DetachVolume(request *ec2.DetachVolumeInput) (*ec2.VolumeAttachment, error) { + requestTime := time.Now() + resp, err := s.ec2.DetachVolume(request) + timeTaken := time.Since(requestTime).Seconds() + recordAWSMetric("detach_volume", timeTaken, err) + return resp, err +} + +func (s *awsSdkEC2) DescribeVolumes(request *ec2.DescribeVolumesInput) ([]*ec2.Volume, error) { + // Volumes are paged + results := []*ec2.Volume{} + var nextToken *string + requestTime := time.Now() + for { + response, err := s.ec2.DescribeVolumes(request) + + if err != nil { + recordAWSMetric("describe_volume", 0, err) + return nil, err + } + + results = append(results, response.Volumes...) + + nextToken = response.NextToken + if aws.StringValue(nextToken) == "" { + break + } + request.NextToken = nextToken + } + timeTaken := time.Since(requestTime).Seconds() + recordAWSMetric("describe_volume", timeTaken, nil) + return results, nil +} + +func (s *awsSdkEC2) CreateVolume(request *ec2.CreateVolumeInput) (*ec2.Volume, error) { + requestTime := time.Now() + resp, err := s.ec2.CreateVolume(request) + timeTaken := time.Since(requestTime).Seconds() + recordAWSMetric("create_volume", timeTaken, err) + return resp, err +} + +func (s *awsSdkEC2) DeleteVolume(request *ec2.DeleteVolumeInput) (*ec2.DeleteVolumeOutput, error) { + requestTime := time.Now() + resp, err := s.ec2.DeleteVolume(request) + timeTaken := time.Since(requestTime).Seconds() + recordAWSMetric("delete_volume", timeTaken, err) + return resp, err +} + +func (s *awsSdkEC2) ModifyVolume(request *ec2.ModifyVolumeInput) (*ec2.ModifyVolumeOutput, error) { + requestTime := time.Now() + resp, err := s.ec2.ModifyVolume(request) + timeTaken := time.Since(requestTime).Seconds() + recordAWSMetric("modify_volume", timeTaken, err) + return resp, err +} + +func (s *awsSdkEC2) DescribeVolumeModifications(request *ec2.DescribeVolumesModificationsInput) ([]*ec2.VolumeModification, error) { + requestTime := time.Now() + results := []*ec2.VolumeModification{} + var nextToken *string + for { + resp, err := s.ec2.DescribeVolumesModifications(request) + if err != nil { + recordAWSMetric("describe_volume_modification", 0, err) + return nil, fmt.Errorf("error listing volume modifictions : %v", err) + } + results = append(results, resp.VolumesModifications...) + nextToken = resp.NextToken + if aws.StringValue(nextToken) == "" { + break + } + request.NextToken = nextToken + } + timeTaken := time.Since(requestTime).Seconds() + recordAWSMetric("describe_volume_modification", timeTaken, nil) + return results, nil +} + +func (s *awsSdkEC2) DescribeSubnets(request *ec2.DescribeSubnetsInput) ([]*ec2.Subnet, error) { + // Subnets are not paged + response, err := s.ec2.DescribeSubnets(request) + if err != nil { + return nil, fmt.Errorf("error listing AWS subnets: %q", err) + } + return response.Subnets, nil +} + +func (s *awsSdkEC2) CreateSecurityGroup(request *ec2.CreateSecurityGroupInput) (*ec2.CreateSecurityGroupOutput, error) { + return s.ec2.CreateSecurityGroup(request) +} + +func (s *awsSdkEC2) DeleteSecurityGroup(request *ec2.DeleteSecurityGroupInput) (*ec2.DeleteSecurityGroupOutput, error) { + return s.ec2.DeleteSecurityGroup(request) +} + +func (s *awsSdkEC2) AuthorizeSecurityGroupIngress(request *ec2.AuthorizeSecurityGroupIngressInput) (*ec2.AuthorizeSecurityGroupIngressOutput, error) { + return s.ec2.AuthorizeSecurityGroupIngress(request) +} + +func (s *awsSdkEC2) RevokeSecurityGroupIngress(request *ec2.RevokeSecurityGroupIngressInput) (*ec2.RevokeSecurityGroupIngressOutput, error) { + return s.ec2.RevokeSecurityGroupIngress(request) +} + +func (s *awsSdkEC2) CreateTags(request *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) { + requestTime := time.Now() + resp, err := s.ec2.CreateTags(request) + timeTaken := time.Since(requestTime).Seconds() + recordAWSMetric("create_tags", timeTaken, err) + return resp, err +} + +func (s *awsSdkEC2) DeleteTags(request *ec2.DeleteTagsInput) (*ec2.DeleteTagsOutput, error) { + requestTime := time.Now() + resp, err := s.ec2.DeleteTags(request) + timeTaken := time.Since(requestTime).Seconds() + recordAWSMetric("create_tags", timeTaken, err) + return resp, err +} + +func (s *awsSdkEC2) DescribeRouteTables(request *ec2.DescribeRouteTablesInput) ([]*ec2.RouteTable, error) { + results := []*ec2.RouteTable{} + var nextToken *string + requestTime := time.Now() + for { + response, err := s.ec2.DescribeRouteTables(request) + if err != nil { + recordAWSMetric("describe_route_tables", 0, err) + return nil, fmt.Errorf("error listing AWS route tables: %q", err) + } + + results = append(results, response.RouteTables...) + + nextToken = response.NextToken + if aws.StringValue(nextToken) == "" { + break + } + request.NextToken = nextToken + } + timeTaken := time.Since(requestTime).Seconds() + recordAWSMetric("describe_route_tables", timeTaken, nil) + return results, nil +} + +func (s *awsSdkEC2) CreateRoute(request *ec2.CreateRouteInput) (*ec2.CreateRouteOutput, error) { + return s.ec2.CreateRoute(request) +} + +func (s *awsSdkEC2) DeleteRoute(request *ec2.DeleteRouteInput) (*ec2.DeleteRouteOutput, error) { + return s.ec2.DeleteRoute(request) +} + +func (s *awsSdkEC2) ModifyInstanceAttribute(request *ec2.ModifyInstanceAttributeInput) (*ec2.ModifyInstanceAttributeOutput, error) { + return s.ec2.ModifyInstanceAttribute(request) +} + +func (s *awsSdkEC2) DescribeVpcs(request *ec2.DescribeVpcsInput) (*ec2.DescribeVpcsOutput, error) { + return s.ec2.DescribeVpcs(request) +} + +func init() { + registerMetrics() + cloudprovider.RegisterCloudProvider(ProviderName, func(config io.Reader) (cloudprovider.Interface, error) { + cfg, err := readAWSCloudConfig(config) + if err != nil { + return nil, fmt.Errorf("unable to read AWS cloud provider config file: %v", err) + } + + if err = cfg.validateOverrides(); err != nil { + return nil, fmt.Errorf("unable to validate custom endpoint overrides: %v", err) + } + + metadata, err := newAWSSDKProvider(nil, cfg).Metadata() + if err != nil { + return nil, fmt.Errorf("error creating AWS metadata client: %q", err) + } + + regionName, _, err := getRegionFromMetadata(*cfg, metadata) + if err != nil { + return nil, err + } + + sess, err := session.NewSessionWithOptions(session.Options{ + Config: *aws.NewConfig().WithRegion(regionName).WithSTSRegionalEndpoint(endpoints.RegionalSTSEndpoint), + SharedConfigState: session.SharedConfigEnable, + }) + if err != nil { + return nil, fmt.Errorf("unable to initialize AWS session: %v", err) + } + + var creds *credentials.Credentials + if cfg.Global.RoleARN != "" { + klog.Infof("Using AWS assumed role %v", cfg.Global.RoleARN) + provider := &stscreds.AssumeRoleProvider{ + Client: sts.New(sess), + RoleARN: cfg.Global.RoleARN, + } + + creds = credentials.NewChainCredentials( + []credentials.Provider{ + &credentials.EnvProvider{}, + provider, + }) + } + + aws := newAWSSDKProvider(creds, cfg) + return newAWSCloud(*cfg, aws) + }) +} + +// readAWSCloudConfig reads an instance of AWSCloudConfig from config reader. +func readAWSCloudConfig(config io.Reader) (*CloudConfig, error) { + var cfg CloudConfig + var err error + + if config != nil { + err = gcfg.ReadInto(&cfg, config) + if err != nil { + return nil, err + } + } + + return &cfg, nil +} + +func updateConfigZone(cfg *CloudConfig, metadata EC2Metadata) error { + if cfg.Global.Zone == "" { + if metadata != nil { + klog.Info("Zone not specified in configuration file; querying AWS metadata service") + var err error + cfg.Global.Zone, err = getAvailabilityZone(metadata) + if err != nil { + return err + } + } + if cfg.Global.Zone == "" { + return fmt.Errorf("no zone specified in configuration file") + } + } + + return nil +} + +func getAvailabilityZone(metadata EC2Metadata) (string, error) { + return metadata.GetMetadata("placement/availability-zone") +} + +// Derives the region from a valid az name. +// Returns an error if the az is known invalid (empty) +func azToRegion(az string) (string, error) { + if len(az) < 1 { + return "", fmt.Errorf("invalid (empty) AZ") + } + + r := regexp.MustCompile(`^([a-zA-Z]+-)+\d+`) + region := r.FindString(az) + if region == "" { + return "", fmt.Errorf("invalid AZ: %s", az) + } + + return region, nil +} + +// newAWSCloud creates a new instance of AWSCloud. +// AWSProvider and instanceId are primarily for tests +func newAWSCloud(cfg CloudConfig, awsServices Services) (*Cloud, error) { + // We have some state in the Cloud object - in particular the attaching map + // Log so that if we are building multiple Cloud objects, it is obvious! + klog.Infof("Building AWS cloudprovider") + + metadata, err := awsServices.Metadata() + if err != nil { + return nil, fmt.Errorf("error creating AWS metadata client: %q", err) + } + + regionName, zone, err := getRegionFromMetadata(cfg, metadata) + if err != nil { + return nil, err + } + + if !cfg.Global.DisableStrictZoneCheck { + if !isRegionValid(regionName, metadata) { + return nil, fmt.Errorf("not a valid AWS zone (unknown region): %s", zone) + } + } else { + klog.Warningf("Strict AWS zone checking is disabled. Proceeding with zone: %s", zone) + } + + ec2, err := awsServices.Compute(regionName) + if err != nil { + return nil, fmt.Errorf("error creating AWS EC2 client: %v", err) + } + + elb, err := awsServices.LoadBalancing(regionName) + if err != nil { + return nil, fmt.Errorf("error creating AWS ELB client: %v", err) + } + + elbv2, err := awsServices.LoadBalancingV2(regionName) + if err != nil { + return nil, fmt.Errorf("error creating AWS ELBV2 client: %v", err) + } + + asg, err := awsServices.Autoscaling(regionName) + if err != nil { + return nil, fmt.Errorf("error creating AWS autoscaling client: %v", err) + } + + kms, err := awsServices.KeyManagement(regionName) + if err != nil { + return nil, fmt.Errorf("error creating AWS key management client: %v", err) + } + + awsCloud := &Cloud{ + ec2: ec2, + elb: elb, + elbv2: elbv2, + asg: asg, + metadata: metadata, + kms: kms, + cfg: &cfg, + region: regionName, + + attaching: make(map[types.NodeName]map[mountDevice]EBSVolumeID), + deviceAllocators: make(map[types.NodeName]DeviceAllocator), + } + awsCloud.instanceCache.cloud = awsCloud + + tagged := cfg.Global.KubernetesClusterTag != "" || cfg.Global.KubernetesClusterID != "" + if cfg.Global.VPC != "" && (cfg.Global.SubnetID != "" || cfg.Global.RoleARN != "") && tagged { + // When the master is running on a different AWS account, cloud provider or on-premise + // build up a dummy instance and use the VPC from the nodes account + klog.Info("Master is configured to run on a different AWS account, different cloud provider or on-premises") + awsCloud.selfAWSInstance = &awsInstance{ + nodeName: "master-dummy", + vpcID: cfg.Global.VPC, + subnetID: cfg.Global.SubnetID, + } + awsCloud.vpcID = cfg.Global.VPC + } else { + selfAWSInstance, err := awsCloud.buildSelfAWSInstance() + if err != nil { + return nil, err + } + awsCloud.selfAWSInstance = selfAWSInstance + awsCloud.vpcID = selfAWSInstance.vpcID + } + + if cfg.Global.KubernetesClusterTag != "" || cfg.Global.KubernetesClusterID != "" { + if err := awsCloud.tagging.init(cfg.Global.KubernetesClusterTag, cfg.Global.KubernetesClusterID); err != nil { + return nil, err + } + } else { + // TODO: Clean up double-API query + info, err := awsCloud.selfAWSInstance.describeInstance() + if err != nil { + return nil, err + } + if err := awsCloud.tagging.initFromTags(info.Tags); err != nil { + return nil, err + } + } + + if len(cfg.Global.NodeIPFamilies) == 0 { + cfg.Global.NodeIPFamilies = []string{"ipv4"} + } + klog.Infof("The following IP families will be added to nodes: %v", cfg.Global.NodeIPFamilies) + + return awsCloud, nil +} + +// isRegionValid accepts an AWS region name and returns if the region is a +// valid region known to the AWS SDK. Considers the region returned from the +// EC2 metadata service to be a valid region as it's only available on a host +// running in a valid AWS region. +func isRegionValid(region string, metadata EC2Metadata) bool { + // Does the AWS SDK know about the region? + for _, p := range endpoints.DefaultPartitions() { + for r := range p.Regions() { + if r == region { + return true + } + } + } + + // ap-northeast-3 is purposely excluded from the SDK because it + // requires an access request (for more details see): + // https://github.com/aws/aws-sdk-go/issues/1863 + if region == "ap-northeast-3" { + return true + } + + // Fallback to checking if the region matches the instance metadata region + // (ignoring any user overrides). This just accounts for running an old + // build of Kubernetes in a new region that wasn't compiled into the SDK + // when Kubernetes was built. + if az, err := getAvailabilityZone(metadata); err == nil { + if r, err := azToRegion(az); err == nil && region == r { + return true + } + } + + return false +} + +// Initialize passes a Kubernetes clientBuilder interface to the cloud provider +func (c *Cloud) Initialize(clientBuilder cloudprovider.ControllerClientBuilder, stop <-chan struct{}) { + c.clientBuilder = clientBuilder + c.kubeClient = clientBuilder.ClientOrDie("aws-cloud-provider") + c.eventBroadcaster = record.NewBroadcaster() + c.eventBroadcaster.StartStructuredLogging(0) + c.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: c.kubeClient.CoreV1().Events("")}) + c.eventRecorder = c.eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "aws-cloud-provider"}) +} + +// Clusters returns the list of clusters. +func (c *Cloud) Clusters() (cloudprovider.Clusters, bool) { + return nil, false +} + +// ProviderName returns the cloud provider ID. +func (c *Cloud) ProviderName() string { + return ProviderName +} + +// LoadBalancer returns an implementation of LoadBalancer for Amazon Web Services. +func (c *Cloud) LoadBalancer() (cloudprovider.LoadBalancer, bool) { + return c, true +} + +// Instances returns an implementation of Instances for Amazon Web Services. +func (c *Cloud) Instances() (cloudprovider.Instances, bool) { + return c, true +} + +// InstancesV2 returns an implementation of InstancesV2 for Amazon Web Services. +// TODO: implement ONLY for external cloud provider +func (c *Cloud) InstancesV2() (cloudprovider.InstancesV2, bool) { + return nil, false +} + +// Zones returns an implementation of Zones for Amazon Web Services. +func (c *Cloud) Zones() (cloudprovider.Zones, bool) { + return c, true +} + +// Routes returns an implementation of Routes for Amazon Web Services. +func (c *Cloud) Routes() (cloudprovider.Routes, bool) { + return c, true +} + +// HasClusterID returns true if the cluster has a clusterID +func (c *Cloud) HasClusterID() bool { + return len(c.tagging.clusterID()) > 0 +} + +// NodeAddresses is an implementation of Instances.NodeAddresses. +func (c *Cloud) NodeAddresses(ctx context.Context, name types.NodeName) ([]v1.NodeAddress, error) { + providerID, err := c.nodeNameToProviderID(name) + if err != nil { + return nil, fmt.Errorf("could not look up provider ID for node %q: %v", name, err) + } + return c.NodeAddressesByProviderID(ctx, string(providerID)) +} + +// extractIPv4NodeAddresses maps the instance information from EC2 to an array of NodeAddresses. +// This function will extract private and public IP addresses and their corresponding DNS names. +func extractIPv4NodeAddresses(instance *ec2.Instance) ([]v1.NodeAddress, error) { + // Not clear if the order matters here, but we might as well indicate a sensible preference order + + if instance == nil { + return nil, fmt.Errorf("nil instance passed to extractNodeAddresses") + } + + addresses := []v1.NodeAddress{} + + // handle internal network interfaces + for _, networkInterface := range instance.NetworkInterfaces { + // skip network interfaces that are not currently in use + if aws.StringValue(networkInterface.Status) != ec2.NetworkInterfaceStatusInUse { + continue + } + + for _, internalIP := range networkInterface.PrivateIpAddresses { + if ipAddress := aws.StringValue(internalIP.PrivateIpAddress); ipAddress != "" { + ip := netutils.ParseIPSloppy(ipAddress) + if ip == nil { + return nil, fmt.Errorf("EC2 instance had invalid private address: %s (%q)", aws.StringValue(instance.InstanceId), ipAddress) + } + addresses = append(addresses, v1.NodeAddress{Type: v1.NodeInternalIP, Address: ip.String()}) + } + } + } + + // TODO: Other IP addresses (multiple ips)? + publicIPAddress := aws.StringValue(instance.PublicIpAddress) + if publicIPAddress != "" { + ip := netutils.ParseIPSloppy(publicIPAddress) + if ip == nil { + return nil, fmt.Errorf("EC2 instance had invalid public address: %s (%s)", aws.StringValue(instance.InstanceId), publicIPAddress) + } + addresses = append(addresses, v1.NodeAddress{Type: v1.NodeExternalIP, Address: ip.String()}) + } + + privateDNSName := aws.StringValue(instance.PrivateDnsName) + if privateDNSName != "" { + addresses = append(addresses, v1.NodeAddress{Type: v1.NodeInternalDNS, Address: privateDNSName}) + addresses = append(addresses, v1.NodeAddress{Type: v1.NodeHostName, Address: privateDNSName}) + } + + publicDNSName := aws.StringValue(instance.PublicDnsName) + if publicDNSName != "" { + addresses = append(addresses, v1.NodeAddress{Type: v1.NodeExternalDNS, Address: publicDNSName}) + } + + return addresses, nil +} + +// extractIPv6NodeAddresses maps the instance information from EC2 to an array of NodeAddresses +// All IPv6 addresses are considered internal even if they are publicly routable. There are no instance DNS names associated with IPv6. +func extractIPv6NodeAddresses(instance *ec2.Instance) ([]v1.NodeAddress, error) { + // Not clear if the order matters here, but we might as well indicate a sensible preference order + + if instance == nil { + return nil, fmt.Errorf("nil instance passed to extractNodeAddresses") + } + + addresses := []v1.NodeAddress{} + + // handle internal network interfaces with IPv6 addresses + for _, networkInterface := range instance.NetworkInterfaces { + // skip network interfaces that are not currently in use + if aws.StringValue(networkInterface.Status) != ec2.NetworkInterfaceStatusInUse || len(networkInterface.Ipv6Addresses) == 0 { + continue + } + + // return only the "first" address for each ENI + internalIPv6 := aws.StringValue(networkInterface.Ipv6Addresses[0].Ipv6Address) + ip := net.ParseIP(internalIPv6) + if ip == nil { + return nil, fmt.Errorf("EC2 instance had invalid IPv6 address: %s (%q)", aws.StringValue(instance.InstanceId), internalIPv6) + } + addresses = append(addresses, v1.NodeAddress{Type: v1.NodeInternalIP, Address: ip.String()}) + } + + return addresses, nil +} + +// getNodeAddressesForFargateNode generates list of Node addresses for Fargate node. +func getNodeAddressesForFargateNode(privateDNSName, privateIP string) []v1.NodeAddress { + addresses := []v1.NodeAddress{} + addresses = append(addresses, v1.NodeAddress{Type: v1.NodeInternalIP, Address: privateIP}) + if privateDNSName != "" { + addresses = append(addresses, v1.NodeAddress{Type: v1.NodeInternalDNS, Address: privateDNSName}) + } + return addresses +} + +// NodeAddressesByProviderID returns the node addresses of an instances with the specified unique providerID +// This method will not be called from the node that is requesting this ID. i.e. metadata service +// and other local methods cannot be used here +func (c *Cloud) NodeAddressesByProviderID(ctx context.Context, providerID string) ([]v1.NodeAddress, error) { + instanceID, err := KubernetesInstanceID(providerID).MapToAWSInstanceID() + if err != nil { + return nil, err + } + + if isFargateNode(string(instanceID)) { + eni, err := c.describeNetworkInterfaces(string(instanceID)) + if eni == nil || err != nil { + return nil, err + } + return getNodeAddressesForFargateNode(aws.StringValue(eni.PrivateDnsName), aws.StringValue(eni.PrivateIpAddress)), nil + } + + instance, err := describeInstance(c.ec2, instanceID) + if err != nil { + return nil, err + } + + var addresses []v1.NodeAddress + + for _, family := range c.cfg.Global.NodeIPFamilies { + switch family { + case "ipv4": + ipv4addr, err := extractIPv4NodeAddresses(instance) + if err != nil { + return nil, err + } + addresses = append(addresses, ipv4addr...) + case "ipv6": + ipv6addr, err := extractIPv6NodeAddresses(instance) + if err != nil { + return nil, err + } + addresses = append(addresses, ipv6addr...) + } + } + + return addresses, nil +} + +// InstanceExistsByProviderID returns true if the instance with the given provider id still exists. +// If false is returned with no error, the instance will be immediately deleted by the cloud controller manager. +func (c *Cloud) InstanceExistsByProviderID(ctx context.Context, providerID string) (bool, error) { + instanceID, err := KubernetesInstanceID(providerID).MapToAWSInstanceID() + if err != nil { + return false, err + } + + if isFargateNode(string(instanceID)) { + eni, err := c.describeNetworkInterfaces(string(instanceID)) + return eni != nil, err + } + + request := &ec2.DescribeInstancesInput{ + InstanceIds: []*string{instanceID.awsString()}, + } + + instances, err := c.ec2.DescribeInstances(request) + if err != nil { + // if err is InstanceNotFound, return false with no error + if isAWSErrorInstanceNotFound(err) { + return false, nil + } + return false, err + } + if len(instances) == 0 { + return false, nil + } + if len(instances) > 1 { + return false, fmt.Errorf("multiple instances found for instance: %s", instanceID) + } + + state := instances[0].State.Name + if *state == ec2.InstanceStateNameTerminated { + klog.Warningf("the instance %s is terminated", instanceID) + return false, nil + } + + return true, nil +} + +// InstanceShutdownByProviderID returns true if the instance is in safe state to detach volumes +func (c *Cloud) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) { + instanceID, err := KubernetesInstanceID(providerID).MapToAWSInstanceID() + if err != nil { + return false, err + } + + if isFargateNode(string(instanceID)) { + eni, err := c.describeNetworkInterfaces(string(instanceID)) + return eni != nil, err + } + + request := &ec2.DescribeInstancesInput{ + InstanceIds: []*string{instanceID.awsString()}, + } + + instances, err := c.ec2.DescribeInstances(request) + if err != nil { + return false, err + } + if len(instances) == 0 { + klog.Warningf("the instance %s does not exist anymore", providerID) + // returns false, because otherwise node is not deleted from cluster + // false means that it will continue to check InstanceExistsByProviderID + return false, nil + } + if len(instances) > 1 { + return false, fmt.Errorf("multiple instances found for instance: %s", instanceID) + } + + instance := instances[0] + if instance.State != nil { + state := aws.StringValue(instance.State.Name) + // valid state for detaching volumes + if state == ec2.InstanceStateNameStopped { + return true, nil + } + } + return false, nil +} + +// InstanceID returns the cloud provider ID of the node with the specified nodeName. +func (c *Cloud) InstanceID(ctx context.Context, nodeName types.NodeName) (string, error) { + // In the future it is possible to also return an endpoint as: + // // + if c.selfAWSInstance.nodeName == nodeName { + return "/" + c.selfAWSInstance.availabilityZone + "/" + c.selfAWSInstance.awsID, nil + } + inst, err := c.getInstanceByNodeName(nodeName) + if err != nil { + if err == cloudprovider.InstanceNotFound { + // The Instances interface requires that we return InstanceNotFound (without wrapping) + return "", err + } + return "", fmt.Errorf("getInstanceByNodeName failed for %q with %q", nodeName, err) + } + return "/" + aws.StringValue(inst.Placement.AvailabilityZone) + "/" + aws.StringValue(inst.InstanceId), nil +} + +// InstanceTypeByProviderID returns the cloudprovider instance type of the node with the specified unique providerID +// This method will not be called from the node that is requesting this ID. i.e. metadata service +// and other local methods cannot be used here +func (c *Cloud) InstanceTypeByProviderID(ctx context.Context, providerID string) (string, error) { + instanceID, err := KubernetesInstanceID(providerID).MapToAWSInstanceID() + if err != nil { + return "", err + } + + if isFargateNode(string(instanceID)) { + return "", nil + } + + instance, err := describeInstance(c.ec2, instanceID) + if err != nil { + return "", err + } + + return aws.StringValue(instance.InstanceType), nil +} + +// InstanceType returns the type of the node with the specified nodeName. +func (c *Cloud) InstanceType(ctx context.Context, nodeName types.NodeName) (string, error) { + if c.selfAWSInstance.nodeName == nodeName { + return c.selfAWSInstance.instanceType, nil + } + inst, err := c.getInstanceByNodeName(nodeName) + if err != nil { + return "", fmt.Errorf("getInstanceByNodeName failed for %q with %q", nodeName, err) + } + return aws.StringValue(inst.InstanceType), nil +} + +// GetCandidateZonesForDynamicVolume retrieves a list of all the zones in which nodes are running +// It currently involves querying all instances +func (c *Cloud) GetCandidateZonesForDynamicVolume() (sets.String, error) { + // We don't currently cache this; it is currently used only in volume + // creation which is expected to be a comparatively rare occurrence. + + // TODO: Caching / expose v1.Nodes to the cloud provider? + // TODO: We could also query for subnets, I think + + // Note: It is more efficient to call the EC2 API twice with different tag + // filters than to call it once with a tag filter that results in a logical + // OR. For really large clusters the logical OR will result in EC2 API rate + // limiting. + instances := []*ec2.Instance{} + + baseFilters := []*ec2.Filter{newEc2Filter("instance-state-name", "running")} + + filters := c.tagging.addFilters(baseFilters) + di, err := c.describeInstances(filters) + if err != nil { + return nil, err + } + + instances = append(instances, di...) + + if c.tagging.usesLegacyTags { + filters = c.tagging.addLegacyFilters(baseFilters) + di, err = c.describeInstances(filters) + if err != nil { + return nil, err + } + + instances = append(instances, di...) + } + + if len(instances) == 0 { + return nil, fmt.Errorf("no instances returned") + } + + zones := sets.NewString() + + for _, instance := range instances { + // We skip over master nodes, if the installation tool labels them with one of the well-known master labels + // This avoids creating a volume in a zone where only the master is running - e.g. #34583 + // This is a short-term workaround until the scheduler takes care of zone selection + master := false + for _, tag := range instance.Tags { + tagKey := aws.StringValue(tag.Key) + if awsTagNameMasterRoles.Has(tagKey) { + master = true + } + } + + if master { + klog.V(4).Infof("Ignoring master instance %q in zone discovery", aws.StringValue(instance.InstanceId)) + continue + } + + if instance.Placement != nil { + zone := aws.StringValue(instance.Placement.AvailabilityZone) + zones.Insert(zone) + } + } + + klog.V(2).Infof("Found instances in zones %s", zones) + return zones, nil +} + +// GetZone implements Zones.GetZone +func (c *Cloud) GetZone(ctx context.Context) (cloudprovider.Zone, error) { + return cloudprovider.Zone{ + FailureDomain: c.selfAWSInstance.availabilityZone, + Region: c.region, + }, nil +} + +// GetZoneByProviderID implements Zones.GetZoneByProviderID +// This is particularly useful in external cloud providers where the kubelet +// does not initialize node data. +func (c *Cloud) GetZoneByProviderID(ctx context.Context, providerID string) (cloudprovider.Zone, error) { + instanceID, err := KubernetesInstanceID(providerID).MapToAWSInstanceID() + if err != nil { + return cloudprovider.Zone{}, err + } + + if isFargateNode(string(instanceID)) { + eni, err := c.describeNetworkInterfaces(string(instanceID)) + if eni == nil || err != nil { + return cloudprovider.Zone{}, err + } + return cloudprovider.Zone{ + FailureDomain: *eni.AvailabilityZone, + Region: c.region, + }, nil + } + + instance, err := c.getInstanceByID(string(instanceID)) + if err != nil { + return cloudprovider.Zone{}, err + } + + zone := cloudprovider.Zone{ + FailureDomain: *(instance.Placement.AvailabilityZone), + Region: c.region, + } + + return zone, nil +} + +// GetZoneByNodeName implements Zones.GetZoneByNodeName +// This is particularly useful in external cloud providers where the kubelet +// does not initialize node data. +func (c *Cloud) GetZoneByNodeName(ctx context.Context, nodeName types.NodeName) (cloudprovider.Zone, error) { + instance, err := c.getInstanceByNodeName(nodeName) + if err != nil { + return cloudprovider.Zone{}, err + } + zone := cloudprovider.Zone{ + FailureDomain: *(instance.Placement.AvailabilityZone), + Region: c.region, + } + + return zone, nil + +} + +func isAWSErrorInstanceNotFound(err error) bool { + if err == nil { + return false + } + + if awsError, ok := err.(awserr.Error); ok { + if awsError.Code() == ec2.UnsuccessfulInstanceCreditSpecificationErrorCodeInvalidInstanceIdNotFound { + return true + } + } + + return false +} + +// Used to represent a mount device for attaching an EBS volume +// This should be stored as a single letter (i.e. c, not sdc or /dev/sdc) +type mountDevice string + +type awsInstance struct { + ec2 EC2 + + // id in AWS + awsID string + + // node name in k8s + nodeName types.NodeName + + // availability zone the instance resides in + availabilityZone string + + // ID of VPC the instance resides in + vpcID string + + // ID of subnet the instance resides in + subnetID string + + // instance type + instanceType string +} + +// newAWSInstance creates a new awsInstance object +func newAWSInstance(ec2Service EC2, instance *ec2.Instance) *awsInstance { + az := "" + if instance.Placement != nil { + az = aws.StringValue(instance.Placement.AvailabilityZone) + } + self := &awsInstance{ + ec2: ec2Service, + awsID: aws.StringValue(instance.InstanceId), + nodeName: mapInstanceToNodeName(instance), + availabilityZone: az, + instanceType: aws.StringValue(instance.InstanceType), + vpcID: aws.StringValue(instance.VpcId), + subnetID: aws.StringValue(instance.SubnetId), + } + + return self +} + +// Gets the full information about this instance from the EC2 API +func (i *awsInstance) describeInstance() (*ec2.Instance, error) { + return describeInstance(i.ec2, InstanceID(i.awsID)) +} + +// Gets the mountDevice already assigned to the volume, or assigns an unused mountDevice. +// If the volume is already assigned, this will return the existing mountDevice with alreadyAttached=true. +// Otherwise the mountDevice is assigned by finding the first available mountDevice, and it is returned with alreadyAttached=false. +func (c *Cloud) getMountDevice( + i *awsInstance, + info *ec2.Instance, + volumeID EBSVolumeID, + assign bool) (assigned mountDevice, alreadyAttached bool, err error) { + + deviceMappings := map[mountDevice]EBSVolumeID{} + volumeStatus := map[EBSVolumeID]string{} // for better logging of volume status + for _, blockDevice := range info.BlockDeviceMappings { + name := aws.StringValue(blockDevice.DeviceName) + name = strings.TrimPrefix(name, "/dev/sd") + name = strings.TrimPrefix(name, "/dev/xvd") + if len(name) < 1 || len(name) > 2 { + klog.Warningf("Unexpected EBS DeviceName: %q", aws.StringValue(blockDevice.DeviceName)) + } + if blockDevice.Ebs != nil && blockDevice.Ebs.VolumeId != nil { + volumeStatus[EBSVolumeID(*blockDevice.Ebs.VolumeId)] = aws.StringValue(blockDevice.Ebs.Status) + } + + deviceMappings[mountDevice(name)] = EBSVolumeID(aws.StringValue(blockDevice.Ebs.VolumeId)) + } + + // We lock to prevent concurrent mounts from conflicting + // We may still conflict if someone calls the API concurrently, + // but the AWS API will then fail one of the two attach operations + c.attachingMutex.Lock() + defer c.attachingMutex.Unlock() + + for mountDevice, volume := range c.attaching[i.nodeName] { + deviceMappings[mountDevice] = volume + } + + // Check to see if this volume is already assigned a device on this machine + for mountDevice, mappingVolumeID := range deviceMappings { + if volumeID == mappingVolumeID { + if assign { + // DescribeInstances shows the volume as attached / detaching, while Kubernetes + // cloud provider thinks it's detached. + // This can happened when the volume has just been detached from the same node + // and AWS API returns stale data in this DescribeInstances ("eventual consistency"). + // Fail the attachment and let A/D controller retry in a while, hoping that + // AWS API returns consistent result next time (i.e. the volume is detached). + status := volumeStatus[mappingVolumeID] + klog.Warningf("Got assignment call for already-assigned volume: %s@%s, volume status: %s", mountDevice, mappingVolumeID, status) + } + return mountDevice, true, nil + } + } + + if !assign { + return mountDevice(""), false, nil + } + + // Find the next unused device name + deviceAllocator := c.deviceAllocators[i.nodeName] + if deviceAllocator == nil { + // we want device names with two significant characters, starting with /dev/xvdbb + // the allowed range is /dev/xvd[b-c][a-z] + // http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/device_naming.html + deviceAllocator = NewDeviceAllocator() + c.deviceAllocators[i.nodeName] = deviceAllocator + } + // We need to lock deviceAllocator to prevent possible race with Deprioritize function + deviceAllocator.Lock() + defer deviceAllocator.Unlock() + + chosen, err := deviceAllocator.GetNext(deviceMappings) + if err != nil { + klog.Warningf("Could not assign a mount device. mappings=%v, error: %v", deviceMappings, err) + return "", false, fmt.Errorf("too many EBS volumes attached to node %s", i.nodeName) + } + + attaching := c.attaching[i.nodeName] + if attaching == nil { + attaching = make(map[mountDevice]EBSVolumeID) + c.attaching[i.nodeName] = attaching + } + attaching[chosen] = volumeID + klog.V(2).Infof("Assigned mount device %s -> volume %s", chosen, volumeID) + + return chosen, false, nil +} + +// endAttaching removes the entry from the "attachments in progress" map +// It returns true if it was found (and removed), false otherwise +func (c *Cloud) endAttaching(i *awsInstance, volumeID EBSVolumeID, mountDevice mountDevice) bool { + c.attachingMutex.Lock() + defer c.attachingMutex.Unlock() + + existingVolumeID, found := c.attaching[i.nodeName][mountDevice] + if !found { + return false + } + if volumeID != existingVolumeID { + // This actually can happen, because getMountDevice combines the attaching map with the volumes + // attached to the instance (as reported by the EC2 API). So if endAttaching comes after + // a 10 second poll delay, we might well have had a concurrent request to allocate a mountpoint, + // which because we allocate sequentially is _very_ likely to get the immediately freed volume + klog.Infof("endAttaching on device %q assigned to different volume: %q vs %q", mountDevice, volumeID, existingVolumeID) + return false + } + klog.V(2).Infof("Releasing in-process attachment entry: %s -> volume %s", mountDevice, volumeID) + delete(c.attaching[i.nodeName], mountDevice) + return true +} + +type awsDisk struct { + ec2 EC2 + + // Name in k8s + name KubernetesVolumeID + // id in AWS + awsID EBSVolumeID +} + +func newAWSDisk(aws *Cloud, name KubernetesVolumeID) (*awsDisk, error) { + awsID, err := name.MapToAWSVolumeID() + if err != nil { + return nil, err + } + disk := &awsDisk{ec2: aws.ec2, name: name, awsID: awsID} + return disk, nil +} + +// Helper function for describeVolume callers. Tries to retype given error to AWS error +// and returns true in case the AWS error is "InvalidVolume.NotFound", false otherwise +func isAWSErrorVolumeNotFound(err error) bool { + if err != nil { + if awsError, ok := err.(awserr.Error); ok { + // https://docs.aws.amazon.com/AWSEC2/latest/APIReference/errors-overview.html + if awsError.Code() == "InvalidVolume.NotFound" { + return true + } + } + } + return false +} + +// Gets the full information about this volume from the EC2 API +func (d *awsDisk) describeVolume() (*ec2.Volume, error) { + volumeID := d.awsID + + request := &ec2.DescribeVolumesInput{ + VolumeIds: []*string{volumeID.awsString()}, + } + + volumes, err := d.ec2.DescribeVolumes(request) + if err != nil { + return nil, err + } + if len(volumes) == 0 { + return nil, fmt.Errorf("no volumes found") + } + if len(volumes) > 1 { + return nil, fmt.Errorf("multiple volumes found") + } + return volumes[0], nil +} + +func (d *awsDisk) describeVolumeModification() (*ec2.VolumeModification, error) { + volumeID := d.awsID + request := &ec2.DescribeVolumesModificationsInput{ + VolumeIds: []*string{volumeID.awsString()}, + } + volumeMods, err := d.ec2.DescribeVolumeModifications(request) + + if err != nil { + return nil, fmt.Errorf("error describing volume modification %s with %v", volumeID, err) + } + + if len(volumeMods) == 0 { + return nil, fmt.Errorf("no volume modifications found for %s", volumeID) + } + lastIndex := len(volumeMods) - 1 + return volumeMods[lastIndex], nil +} + +func (d *awsDisk) modifyVolume(requestGiB int64) (int64, error) { + volumeID := d.awsID + + request := &ec2.ModifyVolumeInput{ + VolumeId: volumeID.awsString(), + Size: aws.Int64(requestGiB), + } + output, err := d.ec2.ModifyVolume(request) + if err != nil { + modifyError := fmt.Errorf("AWS modifyVolume failed for %s with %v", volumeID, err) + return requestGiB, modifyError + } + + volumeModification := output.VolumeModification + + if aws.StringValue(volumeModification.ModificationState) == ec2.VolumeModificationStateCompleted { + return aws.Int64Value(volumeModification.TargetSize), nil + } + + backoff := wait.Backoff{ + Duration: 1 * time.Second, + Factor: 2, + Steps: 10, + } + + checkForResize := func() (bool, error) { + volumeModification, err := d.describeVolumeModification() + + if err != nil { + return false, err + } + + // According to https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/monitoring_mods.html + // Size changes usually take a few seconds to complete and take effect after a volume is in the Optimizing state. + if aws.StringValue(volumeModification.ModificationState) == ec2.VolumeModificationStateOptimizing { + return true, nil + } + return false, nil + } + waitWithErr := wait.ExponentialBackoff(backoff, checkForResize) + return requestGiB, waitWithErr +} + +// applyUnSchedulableTaint applies a unschedulable taint to a node after verifying +// if node has become unusable because of volumes getting stuck in attaching state. +func (c *Cloud) applyUnSchedulableTaint(nodeName types.NodeName, reason string) { + node, fetchErr := c.kubeClient.CoreV1().Nodes().Get(context.TODO(), string(nodeName), metav1.GetOptions{}) + if fetchErr != nil { + klog.Errorf("Error fetching node %s with %v", nodeName, fetchErr) + return + } + + taint := &v1.Taint{ + Key: nodeWithImpairedVolumes, + Value: "true", + Effect: v1.TaintEffectNoSchedule, + } + err := nodehelpers.AddOrUpdateTaintOnNode(c.kubeClient, string(nodeName), taint) + if err != nil { + klog.Errorf("Error applying taint to node %s with error %v", nodeName, err) + return + } + c.eventRecorder.Eventf(node, v1.EventTypeWarning, volumeAttachmentStuck, reason) +} + +// waitForAttachmentStatus polls until the attachment status is the expected value +// On success, it returns the last attachment state. +func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expectedDevice string, alreadyAttached bool) (*ec2.VolumeAttachment, error) { + backoff := wait.Backoff{ + Duration: volumeAttachmentStatusPollDelay, + Factor: volumeAttachmentStatusFactor, + Steps: volumeAttachmentStatusSteps, + } + + // Because of rate limiting, we often see errors from describeVolume. + // Or AWS eventual consistency returns unexpected data. + // So we tolerate a limited number of failures. + // But once we see more than 10 errors in a row, we return the error. + errorCount := 0 + + // Attach/detach usually takes time. It does not make sense to start + // polling DescribeVolumes before some initial delay to let AWS + // process the request. + time.Sleep(getInitialAttachDetachDelay(status)) + + var attachment *ec2.VolumeAttachment + + err := wait.ExponentialBackoff(backoff, func() (bool, error) { + info, err := d.describeVolume() + if err != nil { + // The VolumeNotFound error is special -- we don't need to wait for it to repeat + if isAWSErrorVolumeNotFound(err) { + if status == volumeDetachedStatus { + // The disk doesn't exist, assume it's detached, log warning and stop waiting + klog.Warningf("Waiting for volume %q to be detached but the volume does not exist", d.awsID) + stateStr := "detached" + attachment = &ec2.VolumeAttachment{ + State: &stateStr, + } + return true, nil + } + if status == volumeAttachedStatus { + // The disk doesn't exist, complain, give up waiting and report error + klog.Warningf("Waiting for volume %q to be attached but the volume does not exist", d.awsID) + return false, err + } + } + errorCount++ + if errorCount > volumeAttachmentStatusConsecutiveErrorLimit { + // report the error + return false, err + } + + klog.Warningf("Ignoring error from describe volume for volume %q; will retry: %q", d.awsID, err) + return false, nil + } + + if len(info.Attachments) > 1 { + // Shouldn't happen; log so we know if it is + klog.Warningf("Found multiple attachments for volume %q: %v", d.awsID, info) + } + attachmentStatus := "" + for _, a := range info.Attachments { + if attachmentStatus != "" { + // Shouldn't happen; log so we know if it is + klog.Warningf("Found multiple attachments for volume %q: %v", d.awsID, info) + } + if a.State != nil { + attachment = a + attachmentStatus = *a.State + } else { + // Shouldn't happen; log so we know if it is + klog.Warningf("Ignoring nil attachment state for volume %q: %v", d.awsID, a) + } + } + if attachmentStatus == "" { + attachmentStatus = volumeDetachedStatus + } + if attachment != nil { + // AWS eventual consistency can go back in time. + // For example, we're waiting for a volume to be attached as /dev/xvdba, but AWS can tell us it's + // attached as /dev/xvdbb, where it was attached before and it was already detached. + // Retry couple of times, hoping AWS starts reporting the right status. + device := aws.StringValue(attachment.Device) + if expectedDevice != "" && device != "" && device != expectedDevice { + klog.Warningf("Expected device %s %s for volume %s, but found device %s %s", expectedDevice, status, d.name, device, attachmentStatus) + errorCount++ + if errorCount > volumeAttachmentStatusConsecutiveErrorLimit { + // report the error + return false, fmt.Errorf("attachment of disk %q failed: requested device %q but found %q", d.name, expectedDevice, device) + } + return false, nil + } + instanceID := aws.StringValue(attachment.InstanceId) + if expectedInstance != "" && instanceID != "" && instanceID != expectedInstance { + klog.Warningf("Expected instance %s/%s for volume %s, but found instance %s/%s", expectedInstance, status, d.name, instanceID, attachmentStatus) + errorCount++ + if errorCount > volumeAttachmentStatusConsecutiveErrorLimit { + // report the error + return false, fmt.Errorf("attachment of disk %q failed: requested device %q but found %q", d.name, expectedDevice, device) + } + return false, nil + } + } + + // if we expected volume to be attached and it was reported as already attached via DescribeInstance call + // but DescribeVolume told us volume is detached, we will short-circuit this long wait loop and return error + // so as AttachDisk can be retried without waiting for 20 minutes. + if (status == volumeAttachedStatus) && alreadyAttached && (attachmentStatus != status) { + return false, fmt.Errorf("attachment of disk %q failed, expected device to be attached but was %s", d.name, attachmentStatus) + } + + if attachmentStatus == status { + // Attachment is in requested state, finish waiting + return true, nil + } + // continue waiting + errorCount = 0 + klog.V(2).Infof("Waiting for volume %q state: actual=%s, desired=%s", d.awsID, attachmentStatus, status) + return false, nil + }) + return attachment, err +} + +// Deletes the EBS disk +func (d *awsDisk) deleteVolume() (bool, error) { + request := &ec2.DeleteVolumeInput{VolumeId: d.awsID.awsString()} + _, err := d.ec2.DeleteVolume(request) + if err != nil { + if isAWSErrorVolumeNotFound(err) { + return false, nil + } + if awsError, ok := err.(awserr.Error); ok { + if awsError.Code() == "VolumeInUse" { + return false, volerr.NewDeletedVolumeInUseError(err.Error()) + } + } + return false, fmt.Errorf("error deleting EBS volume %q: %q", d.awsID, err) + } + return true, nil +} + +// Builds the awsInstance for the EC2 instance on which we are running. +// This is called when the AWSCloud is initialized, and should not be called otherwise (because the awsInstance for the local instance is a singleton with drive mapping state) +func (c *Cloud) buildSelfAWSInstance() (*awsInstance, error) { + if c.selfAWSInstance != nil { + panic("do not call buildSelfAWSInstance directly") + } + instanceID, err := c.metadata.GetMetadata("instance-id") + if err != nil { + return nil, fmt.Errorf("error fetching instance-id from ec2 metadata service: %q", err) + } + + // We want to fetch the hostname via the EC2 metadata service + // (`GetMetadata("local-hostname")`): But see #11543 - we need to use + // the EC2 API to get the privateDnsName in case of a private DNS zone + // e.g. mydomain.io, because the metadata service returns the wrong + // hostname. Once we're doing that, we might as well get all our + // information from the instance returned by the EC2 API - it is a + // single API call to get all the information, and it means we don't + // have two code paths. + instance, err := c.getInstanceByID(instanceID) + if err != nil { + return nil, fmt.Errorf("error finding instance %s: %q", instanceID, err) + } + return newAWSInstance(c.ec2, instance), nil +} + +// wrapAttachError wraps the error returned by an AttachVolume request with +// additional information, if needed and possible. +func wrapAttachError(err error, disk *awsDisk, instance string) error { + if awsError, ok := err.(awserr.Error); ok { + if awsError.Code() == "VolumeInUse" { + info, err := disk.describeVolume() + if err != nil { + klog.Errorf("Error describing volume %q: %q", disk.awsID, err) + } else { + for _, a := range info.Attachments { + if disk.awsID != EBSVolumeID(aws.StringValue(a.VolumeId)) { + klog.Warningf("Expected to get attachment info of volume %q but instead got info of %q", disk.awsID, aws.StringValue(a.VolumeId)) + } else if aws.StringValue(a.State) == "attached" { + return fmt.Errorf("error attaching EBS volume %q to instance %q: %q. The volume is currently attached to instance %q", disk.awsID, instance, awsError, aws.StringValue(a.InstanceId)) + } + } + } + } + } + return fmt.Errorf("error attaching EBS volume %q to instance %q: %q", disk.awsID, instance, err) +} + +// AttachDisk implements Volumes.AttachDisk +func (c *Cloud) AttachDisk(diskName KubernetesVolumeID, nodeName types.NodeName) (string, error) { + disk, err := newAWSDisk(c, diskName) + if err != nil { + return "", err + } + + awsInstance, info, err := c.getFullInstance(nodeName) + if err != nil { + return "", fmt.Errorf("error finding instance %s: %q", nodeName, err) + } + + // mountDevice will hold the device where we should try to attach the disk + var mountDevice mountDevice + // alreadyAttached is true if we have already called AttachVolume on this disk + var alreadyAttached bool + + // attachEnded is set to true if the attach operation completed + // (successfully or not), and is thus no longer in progress + attachEnded := false + defer func() { + if attachEnded { + if !c.endAttaching(awsInstance, disk.awsID, mountDevice) { + klog.Errorf("endAttaching called for disk %q when attach not in progress", disk.awsID) + } + } + }() + + mountDevice, alreadyAttached, err = c.getMountDevice(awsInstance, info, disk.awsID, true) + if err != nil { + return "", err + } + + // Inside the instance, the mountpoint always looks like /dev/xvdX (?) + hostDevice := "/dev/xvd" + string(mountDevice) + // We are using xvd names (so we are HVM only) + // See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/device_naming.html + ec2Device := "/dev/xvd" + string(mountDevice) + + if !alreadyAttached { + available, err := c.checkIfAvailable(disk, "attaching", awsInstance.awsID) + if err != nil { + klog.Error(err) + } + + if !available { + attachEnded = true + return "", err + } + request := &ec2.AttachVolumeInput{ + Device: aws.String(ec2Device), + InstanceId: aws.String(awsInstance.awsID), + VolumeId: disk.awsID.awsString(), + } + + attachResponse, err := c.ec2.AttachVolume(request) + if err != nil { + attachEnded = true + // TODO: Check if the volume was concurrently attached? + return "", wrapAttachError(err, disk, awsInstance.awsID) + } + if da, ok := c.deviceAllocators[awsInstance.nodeName]; ok { + da.Deprioritize(mountDevice) + } + klog.V(2).Infof("AttachVolume volume=%q instance=%q request returned %v", disk.awsID, awsInstance.awsID, attachResponse) + } + + attachment, err := disk.waitForAttachmentStatus("attached", awsInstance.awsID, ec2Device, alreadyAttached) + + if err != nil { + if err == wait.ErrWaitTimeout { + c.applyUnSchedulableTaint(nodeName, "Volume stuck in attaching state - node needs reboot to fix impaired state.") + } + return "", err + } + + // The attach operation has finished + attachEnded = true + + // Double check the attachment to be 100% sure we attached the correct volume at the correct mountpoint + // It could happen otherwise that we see the volume attached from a previous/separate AttachVolume call, + // which could theoretically be against a different device (or even instance). + if attachment == nil { + // Impossible? + return "", fmt.Errorf("unexpected state: attachment nil after attached %q to %q", diskName, nodeName) + } + if ec2Device != aws.StringValue(attachment.Device) { + // Already checked in waitForAttachmentStatus(), but just to be sure... + return "", fmt.Errorf("disk attachment of %q to %q failed: requested device %q but found %q", diskName, nodeName, ec2Device, aws.StringValue(attachment.Device)) + } + if awsInstance.awsID != aws.StringValue(attachment.InstanceId) { + return "", fmt.Errorf("disk attachment of %q to %q failed: requested instance %q but found %q", diskName, nodeName, awsInstance.awsID, aws.StringValue(attachment.InstanceId)) + } + + return hostDevice, nil +} + +// DetachDisk implements Volumes.DetachDisk +func (c *Cloud) DetachDisk(diskName KubernetesVolumeID, nodeName types.NodeName) (string, error) { + diskInfo, attached, err := c.checkIfAttachedToNode(diskName, nodeName) + if err != nil { + if isAWSErrorVolumeNotFound(err) { + // Someone deleted the volume being detached; complain, but do nothing else and return success + klog.Warningf("DetachDisk %s called for node %s but volume does not exist; assuming the volume is detached", diskName, nodeName) + return "", nil + } + + return "", err + } + + if !attached && diskInfo.ec2Instance != nil { + klog.Warningf("DetachDisk %s called for node %s but volume is attached to node %s", diskName, nodeName, diskInfo.nodeName) + return "", nil + } + + if !attached { + return "", nil + } + + awsInstance := newAWSInstance(c.ec2, diskInfo.ec2Instance) + + mountDevice, alreadyAttached, err := c.getMountDevice(awsInstance, diskInfo.ec2Instance, diskInfo.disk.awsID, false) + if err != nil { + return "", err + } + + if !alreadyAttached { + klog.Warningf("DetachDisk called on non-attached disk: %s", diskName) + // TODO: Continue? Tolerate non-attached error from the AWS DetachVolume call? + } + + request := ec2.DetachVolumeInput{ + InstanceId: &awsInstance.awsID, + VolumeId: diskInfo.disk.awsID.awsString(), + } + + response, err := c.ec2.DetachVolume(&request) + if err != nil { + return "", fmt.Errorf("error detaching EBS volume %q from %q: %q", diskInfo.disk.awsID, awsInstance.awsID, err) + } + + if response == nil { + return "", errors.New("no response from DetachVolume") + } + + attachment, err := diskInfo.disk.waitForAttachmentStatus("detached", awsInstance.awsID, "", false) + if err != nil { + return "", err + } + if da, ok := c.deviceAllocators[awsInstance.nodeName]; ok { + da.Deprioritize(mountDevice) + } + if attachment != nil { + // We expect it to be nil, it is (maybe) interesting if it is not + klog.V(2).Infof("waitForAttachmentStatus returned non-nil attachment with state=detached: %v", attachment) + } + + if mountDevice != "" { + c.endAttaching(awsInstance, diskInfo.disk.awsID, mountDevice) + // We don't check the return value - we don't really expect the attachment to have been + // in progress, though it might have been + } + + hostDevicePath := "/dev/xvd" + string(mountDevice) + return hostDevicePath, err +} + +// CreateDisk implements Volumes.CreateDisk +func (c *Cloud) CreateDisk(volumeOptions *VolumeOptions) (KubernetesVolumeID, error) { + var createType string + var iops int64 + switch volumeOptions.VolumeType { + case VolumeTypeGP2, VolumeTypeSC1, VolumeTypeST1: + createType = volumeOptions.VolumeType + + case VolumeTypeIO1: + // See http://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_CreateVolume.html + // for IOPS constraints. AWS will throw an error if IOPS per GB gets out + // of supported bounds, no need to check it here. + createType = volumeOptions.VolumeType + iops = int64(volumeOptions.CapacityGB * volumeOptions.IOPSPerGB) + + // Cap at min/max total IOPS, AWS would throw an error if it gets too + // low/high. + if iops < MinTotalIOPS { + iops = MinTotalIOPS + } + if iops > MaxTotalIOPS { + iops = MaxTotalIOPS + } + + case "": + createType = DefaultVolumeType + + default: + return KubernetesVolumeID(""), fmt.Errorf("invalid AWS VolumeType %q", volumeOptions.VolumeType) + } + + request := &ec2.CreateVolumeInput{} + request.AvailabilityZone = aws.String(volumeOptions.AvailabilityZone) + request.Size = aws.Int64(int64(volumeOptions.CapacityGB)) + request.VolumeType = aws.String(createType) + request.Encrypted = aws.Bool(volumeOptions.Encrypted) + if len(volumeOptions.KmsKeyID) > 0 { + request.KmsKeyId = aws.String(volumeOptions.KmsKeyID) + request.Encrypted = aws.Bool(true) + } + if iops > 0 { + request.Iops = aws.Int64(iops) + } + + tags := volumeOptions.Tags + tags = c.tagging.buildTags(ResourceLifecycleOwned, tags) + + var tagList []*ec2.Tag + for k, v := range tags { + tagList = append(tagList, &ec2.Tag{ + Key: aws.String(k), Value: aws.String(v), + }) + } + request.TagSpecifications = append(request.TagSpecifications, &ec2.TagSpecification{ + Tags: tagList, + ResourceType: aws.String(ec2.ResourceTypeVolume), + }) + + response, err := c.ec2.CreateVolume(request) + if err != nil { + return KubernetesVolumeID(""), err + } + + awsID := EBSVolumeID(aws.StringValue(response.VolumeId)) + if awsID == "" { + return KubernetesVolumeID(""), fmt.Errorf("VolumeID was not returned by CreateVolume") + } + volumeName := KubernetesVolumeID("aws://" + aws.StringValue(response.AvailabilityZone) + "/" + string(awsID)) + + err = c.waitUntilVolumeAvailable(volumeName) + if err != nil { + // AWS has a bad habbit of reporting success when creating a volume with + // encryption keys that either don't exists or have wrong permissions. + // Such volume lives for couple of seconds and then it's silently deleted + // by AWS. There is no other check to ensure that given KMS key is correct, + // because Kubernetes may have limited permissions to the key. + if isAWSErrorVolumeNotFound(err) { + err = fmt.Errorf("failed to create encrypted volume: the volume disappeared after creation, most likely due to inaccessible KMS encryption key") + } else { + // When DescribeVolumes api failed, plugin will lose track on the volumes' state + // driver should be able to clean up these kind of volumes to make sure they are not leaked on customers' account + klog.V(5).Infof("Failed to create the volume %v due to %v. Will try to delete it.", volumeName, err) + awsDisk, newDiskError := newAWSDisk(c, volumeName) + if newDiskError != nil { + klog.Errorf("Failed to delete the volume %v due to error: %v", volumeName, newDiskError) + } else { + if _, deleteVolumeError := awsDisk.deleteVolume(); deleteVolumeError != nil { + klog.Errorf("Failed to delete the volume %v due to error: %v", volumeName, deleteVolumeError) + } else { + klog.V(5).Infof("%v is deleted because it is not in desired state after waiting", volumeName) + } + } + } + return KubernetesVolumeID(""), err + } + + return volumeName, nil +} + +func (c *Cloud) waitUntilVolumeAvailable(volumeName KubernetesVolumeID) error { + disk, err := newAWSDisk(c, volumeName) + if err != nil { + // Unreachable code + return err + } + time.Sleep(5 * time.Second) + backoff := wait.Backoff{ + Duration: volumeCreateInitialDelay, + Factor: volumeCreateBackoffFactor, + Steps: volumeCreateBackoffSteps, + } + err = wait.ExponentialBackoff(backoff, func() (done bool, err error) { + vol, err := disk.describeVolume() + if err != nil { + return true, err + } + if vol.State != nil { + switch *vol.State { + case "available": + // The volume is Available, it won't be deleted now. + return true, nil + case "creating": + return false, nil + default: + return true, fmt.Errorf("unexpected State of newly created AWS EBS volume %s: %q", volumeName, *vol.State) + } + } + return false, nil + }) + return err +} + +// DeleteDisk implements Volumes.DeleteDisk +func (c *Cloud) DeleteDisk(volumeName KubernetesVolumeID) (bool, error) { + awsDisk, err := newAWSDisk(c, volumeName) + if err != nil { + return false, err + } + available, err := c.checkIfAvailable(awsDisk, "deleting", "") + if err != nil { + if isAWSErrorVolumeNotFound(err) { + klog.V(2).Infof("Volume %s not found when deleting it, assuming it's deleted", awsDisk.awsID) + return false, nil + } + if volerr.IsDanglingError(err) { + // The volume is still attached somewhere + return false, volerr.NewDeletedVolumeInUseError(err.Error()) + } + klog.Error(err) + } + + if !available { + return false, err + } + + return awsDisk.deleteVolume() +} + +func (c *Cloud) checkIfAvailable(disk *awsDisk, opName string, instance string) (bool, error) { + info, err := disk.describeVolume() + + if err != nil { + klog.Errorf("Error describing volume %q: %q", disk.awsID, err) + // if for some reason we can not describe volume we will return error + return false, err + } + + volumeState := aws.StringValue(info.State) + opError := fmt.Sprintf("error %s EBS volume %q", opName, disk.awsID) + if len(instance) != 0 { + opError = fmt.Sprintf("%q to instance %q", opError, instance) + } + + // Only available volumes can be attached or deleted + if volumeState != "available" { + // Volume is attached somewhere else and we can not attach it here + if len(info.Attachments) > 0 { + attachment := info.Attachments[0] + instanceID := aws.StringValue(attachment.InstanceId) + attachedInstance, ierr := c.getInstanceByID(instanceID) + attachErr := fmt.Sprintf("%s since volume is currently attached to %q", opError, instanceID) + if ierr != nil { + klog.Error(attachErr) + return false, errors.New(attachErr) + } + devicePath := aws.StringValue(attachment.Device) + nodeName := mapInstanceToNodeName(attachedInstance) + + danglingErr := volerr.NewDanglingError(attachErr, nodeName, devicePath) + return false, danglingErr + } + + attachErr := fmt.Errorf("%s since volume is in %q state", opError, volumeState) + return false, attachErr + } + + return true, nil +} + +// GetLabelsForVolume gets the volume labels for a volume +func (c *Cloud) GetLabelsForVolume(ctx context.Context, pv *v1.PersistentVolume) (map[string]string, error) { + // Ignore if not AWSElasticBlockStore. + if pv.Spec.AWSElasticBlockStore == nil { + return nil, nil + } + + // Ignore any volumes that are being provisioned + if pv.Spec.AWSElasticBlockStore.VolumeID == cloudvolume.ProvisionedVolumeName { + return nil, nil + } + + spec := KubernetesVolumeID(pv.Spec.AWSElasticBlockStore.VolumeID) + labels, err := c.GetVolumeLabels(spec) + if err != nil { + return nil, err + } + + return labels, nil +} + +// GetVolumeLabels implements Volumes.GetVolumeLabels +func (c *Cloud) GetVolumeLabels(volumeName KubernetesVolumeID) (map[string]string, error) { + awsDisk, err := newAWSDisk(c, volumeName) + if err != nil { + return nil, err + } + info, err := awsDisk.describeVolume() + if err != nil { + return nil, err + } + labels := make(map[string]string) + az := aws.StringValue(info.AvailabilityZone) + if az == "" { + return nil, fmt.Errorf("volume did not have AZ information: %q", aws.StringValue(info.VolumeId)) + } + + labels[v1.LabelTopologyZone] = az + region, err := azToRegion(az) + if err != nil { + return nil, err + } + labels[v1.LabelTopologyRegion] = region + + return labels, nil +} + +// GetDiskPath implements Volumes.GetDiskPath +func (c *Cloud) GetDiskPath(volumeName KubernetesVolumeID) (string, error) { + awsDisk, err := newAWSDisk(c, volumeName) + if err != nil { + return "", err + } + info, err := awsDisk.describeVolume() + if err != nil { + return "", err + } + if len(info.Attachments) == 0 { + return "", fmt.Errorf("No attachment to volume %s", volumeName) + } + return aws.StringValue(info.Attachments[0].Device), nil +} + +// DiskIsAttached implements Volumes.DiskIsAttached +func (c *Cloud) DiskIsAttached(diskName KubernetesVolumeID, nodeName types.NodeName) (bool, error) { + _, attached, err := c.checkIfAttachedToNode(diskName, nodeName) + if err != nil { + if isAWSErrorVolumeNotFound(err) { + // The disk doesn't exist, can't be attached + klog.Warningf("DiskIsAttached called for volume %s on node %s but the volume does not exist", diskName, nodeName) + return false, nil + } + + return true, err + } + + return attached, nil +} + +// DisksAreAttached returns a map of nodes and Kubernetes volume IDs indicating +// if the volumes are attached to the node +func (c *Cloud) DisksAreAttached(nodeDisks map[types.NodeName][]KubernetesVolumeID) (map[types.NodeName]map[KubernetesVolumeID]bool, error) { + attached := make(map[types.NodeName]map[KubernetesVolumeID]bool) + + if len(nodeDisks) == 0 { + return attached, nil + } + + nodeNames := []string{} + for nodeName, diskNames := range nodeDisks { + for _, diskName := range diskNames { + setNodeDisk(attached, diskName, nodeName, false) + } + nodeNames = append(nodeNames, mapNodeNameToPrivateDNSName(nodeName)) + } + + // Note that we get instances regardless of state. + // This means there might be multiple nodes with the same node names. + awsInstances, err := c.getInstancesByNodeNames(nodeNames) + if err != nil { + // When there is an error fetching instance information + // it is safer to return nil and let volume information not be touched. + return nil, err + } + + if len(awsInstances) == 0 { + klog.V(2).Infof("DisksAreAttached found no instances matching node names; will assume disks not attached") + return attached, nil + } + + // Note that we check that the volume is attached to the correct node, not that it is attached to _a_ node + for _, awsInstance := range awsInstances { + nodeName := mapInstanceToNodeName(awsInstance) + + diskNames := nodeDisks[nodeName] + if len(diskNames) == 0 { + continue + } + + awsInstanceState := "" + if awsInstance != nil && awsInstance.State != nil { + awsInstanceState = aws.StringValue(awsInstance.State.Name) + } + if awsInstanceState == "terminated" { + // Instance is terminated, safe to assume volumes not attached + // Note that we keep volumes attached to instances in other states (most notably, stopped) + continue + } + + idToDiskName := make(map[EBSVolumeID]KubernetesVolumeID) + for _, diskName := range diskNames { + volumeID, err := diskName.MapToAWSVolumeID() + if err != nil { + return nil, fmt.Errorf("error mapping volume spec %q to aws id: %v", diskName, err) + } + idToDiskName[volumeID] = diskName + } + + for _, blockDevice := range awsInstance.BlockDeviceMappings { + volumeID := EBSVolumeID(aws.StringValue(blockDevice.Ebs.VolumeId)) + diskName, found := idToDiskName[volumeID] + if found { + // Disk is still attached to node + setNodeDisk(attached, diskName, nodeName, true) + } + } + } + + return attached, nil +} + +// ResizeDisk resizes an EBS volume in GiB increments, it will round up to the +// next GiB if arguments are not provided in even GiB increments +func (c *Cloud) ResizeDisk( + diskName KubernetesVolumeID, + oldSize resource.Quantity, + newSize resource.Quantity) (resource.Quantity, error) { + awsDisk, err := newAWSDisk(c, diskName) + if err != nil { + return oldSize, err + } + + volumeInfo, err := awsDisk.describeVolume() + if err != nil { + descErr := fmt.Errorf("AWS.ResizeDisk Error describing volume %s with %v", diskName, err) + return oldSize, descErr + } + // AWS resizes in chunks of GiB (not GB) + requestGiB, err := volumehelpers.RoundUpToGiB(newSize) + if err != nil { + return oldSize, err + } + newSizeQuant := resource.MustParse(fmt.Sprintf("%dGi", requestGiB)) + + // If disk already if of greater or equal size than requested we return + if aws.Int64Value(volumeInfo.Size) >= requestGiB { + return newSizeQuant, nil + } + _, err = awsDisk.modifyVolume(requestGiB) + + if err != nil { + return oldSize, err + } + return newSizeQuant, nil +} + +// Gets the current load balancer state +func (c *Cloud) describeLoadBalancer(name string) (*elb.LoadBalancerDescription, error) { + request := &elb.DescribeLoadBalancersInput{} + request.LoadBalancerNames = []*string{&name} + + response, err := c.elb.DescribeLoadBalancers(request) + if err != nil { + if awsError, ok := err.(awserr.Error); ok { + if awsError.Code() == "LoadBalancerNotFound" { + return nil, nil + } + } + return nil, err + } + + var ret *elb.LoadBalancerDescription + for _, loadBalancer := range response.LoadBalancerDescriptions { + if ret != nil { + klog.Errorf("Found multiple load balancers with name: %s", name) + } + ret = loadBalancer + } + return ret, nil +} + +func (c *Cloud) addLoadBalancerTags(loadBalancerName string, requested map[string]string) error { + var tags []*elb.Tag + for k, v := range requested { + tag := &elb.Tag{ + Key: aws.String(k), + Value: aws.String(v), + } + tags = append(tags, tag) + } + + request := &elb.AddTagsInput{} + request.LoadBalancerNames = []*string{&loadBalancerName} + request.Tags = tags + + _, err := c.elb.AddTags(request) + if err != nil { + return fmt.Errorf("error adding tags to load balancer: %v", err) + } + return nil +} + +// Gets the current load balancer state +func (c *Cloud) describeLoadBalancerv2(name string) (*elbv2.LoadBalancer, error) { + request := &elbv2.DescribeLoadBalancersInput{ + Names: []*string{aws.String(name)}, + } + + response, err := c.elbv2.DescribeLoadBalancers(request) + if err != nil { + if awsError, ok := err.(awserr.Error); ok { + if awsError.Code() == elbv2.ErrCodeLoadBalancerNotFoundException { + return nil, nil + } + } + return nil, fmt.Errorf("error describing load balancer: %q", err) + } + + // AWS will not return 2 load balancers with the same name _and_ type. + for i := range response.LoadBalancers { + if aws.StringValue(response.LoadBalancers[i].Type) == elbv2.LoadBalancerTypeEnumNetwork { + return response.LoadBalancers[i], nil + } + } + + return nil, fmt.Errorf("NLB '%s' could not be found", name) +} + +// Retrieves instance's vpc id from metadata +func (c *Cloud) findVPCID() (string, error) { + macs, err := c.metadata.GetMetadata("network/interfaces/macs/") + if err != nil { + return "", fmt.Errorf("could not list interfaces of the instance: %q", err) + } + + // loop over interfaces, first vpc id returned wins + for _, macPath := range strings.Split(macs, "\n") { + if len(macPath) == 0 { + continue + } + url := fmt.Sprintf("network/interfaces/macs/%svpc-id", macPath) + vpcID, err := c.metadata.GetMetadata(url) + if err != nil { + continue + } + return vpcID, nil + } + return "", fmt.Errorf("could not find VPC ID in instance metadata") +} + +// Retrieves the specified security group from the AWS API, or returns nil if not found +func (c *Cloud) findSecurityGroup(securityGroupID string) (*ec2.SecurityGroup, error) { + describeSecurityGroupsRequest := &ec2.DescribeSecurityGroupsInput{ + GroupIds: []*string{&securityGroupID}, + } + // We don't apply our tag filters because we are retrieving by ID + + groups, err := c.ec2.DescribeSecurityGroups(describeSecurityGroupsRequest) + if err != nil { + klog.Warningf("Error retrieving security group: %q", err) + return nil, err + } + + if len(groups) == 0 { + return nil, nil + } + if len(groups) != 1 { + // This should not be possible - ids should be unique + return nil, fmt.Errorf("multiple security groups found with same id %q", securityGroupID) + } + group := groups[0] + return group, nil +} + +func isEqualIntPointer(l, r *int64) bool { + if l == nil { + return r == nil + } + if r == nil { + return l == nil + } + return *l == *r +} + +func isEqualStringPointer(l, r *string) bool { + if l == nil { + return r == nil + } + if r == nil { + return l == nil + } + return *l == *r +} + +func ipPermissionExists(newPermission, existing *ec2.IpPermission, compareGroupUserIDs bool) bool { + if !isEqualIntPointer(newPermission.FromPort, existing.FromPort) { + return false + } + if !isEqualIntPointer(newPermission.ToPort, existing.ToPort) { + return false + } + if !isEqualStringPointer(newPermission.IpProtocol, existing.IpProtocol) { + return false + } + // Check only if newPermission is a subset of existing. Usually it has zero or one elements. + // Not doing actual CIDR math yet; not clear it's needed, either. + klog.V(4).Infof("Comparing %v to %v", newPermission, existing) + if len(newPermission.IpRanges) > len(existing.IpRanges) { + return false + } + + for j := range newPermission.IpRanges { + found := false + for k := range existing.IpRanges { + if isEqualStringPointer(newPermission.IpRanges[j].CidrIp, existing.IpRanges[k].CidrIp) { + found = true + break + } + } + if !found { + return false + } + } + + for _, leftPair := range newPermission.UserIdGroupPairs { + found := false + for _, rightPair := range existing.UserIdGroupPairs { + if isEqualUserGroupPair(leftPair, rightPair, compareGroupUserIDs) { + found = true + break + } + } + if !found { + return false + } + } + + return true +} + +func isEqualUserGroupPair(l, r *ec2.UserIdGroupPair, compareGroupUserIDs bool) bool { + klog.V(2).Infof("Comparing %v to %v", *l.GroupId, *r.GroupId) + if isEqualStringPointer(l.GroupId, r.GroupId) { + if compareGroupUserIDs { + if isEqualStringPointer(l.UserId, r.UserId) { + return true + } + } else { + return true + } + } + + return false +} + +// Makes sure the security group ingress is exactly the specified permissions +// Returns true if and only if changes were made +// The security group must already exist +func (c *Cloud) setSecurityGroupIngress(securityGroupID string, permissions IPPermissionSet) (bool, error) { + group, err := c.findSecurityGroup(securityGroupID) + if err != nil { + klog.Warningf("Error retrieving security group %q", err) + return false, err + } + + if group == nil { + return false, fmt.Errorf("security group not found: %s", securityGroupID) + } + + klog.V(2).Infof("Existing security group ingress: %s %v", securityGroupID, group.IpPermissions) + + actual := NewIPPermissionSet(group.IpPermissions...) + + // EC2 groups rules together, for example combining: + // + // { Port=80, Range=[A] } and { Port=80, Range=[B] } + // + // into { Port=80, Range=[A,B] } + // + // We have to ungroup them, because otherwise the logic becomes really + // complicated, and also because if we have Range=[A,B] and we try to + // add Range=[A] then EC2 complains about a duplicate rule. + permissions = permissions.Ungroup() + actual = actual.Ungroup() + + remove := actual.Difference(permissions) + add := permissions.Difference(actual) + + if add.Len() == 0 && remove.Len() == 0 { + return false, nil + } + + // TODO: There is a limit in VPC of 100 rules per security group, so we + // probably should try grouping or combining to fit under this limit. + // But this is only used on the ELB security group currently, so it + // would require (ports * CIDRS) > 100. Also, it isn't obvious exactly + // how removing single permissions from compound rules works, and we + // don't want to accidentally open more than intended while we're + // applying changes. + if add.Len() != 0 { + klog.V(2).Infof("Adding security group ingress: %s %v", securityGroupID, add.List()) + + request := &ec2.AuthorizeSecurityGroupIngressInput{} + request.GroupId = &securityGroupID + request.IpPermissions = add.List() + _, err = c.ec2.AuthorizeSecurityGroupIngress(request) + if err != nil { + return false, fmt.Errorf("error authorizing security group ingress: %q", err) + } + } + if remove.Len() != 0 { + klog.V(2).Infof("Remove security group ingress: %s %v", securityGroupID, remove.List()) + + request := &ec2.RevokeSecurityGroupIngressInput{} + request.GroupId = &securityGroupID + request.IpPermissions = remove.List() + _, err = c.ec2.RevokeSecurityGroupIngress(request) + if err != nil { + return false, fmt.Errorf("error revoking security group ingress: %q", err) + } + } + + return true, nil +} + +// Makes sure the security group includes the specified permissions +// Returns true if and only if changes were made +// The security group must already exist +func (c *Cloud) addSecurityGroupIngress(securityGroupID string, addPermissions []*ec2.IpPermission) (bool, error) { + // We do not want to make changes to the Global defined SG + if securityGroupID == c.cfg.Global.ElbSecurityGroup { + return false, nil + } + + group, err := c.findSecurityGroup(securityGroupID) + if err != nil { + klog.Warningf("Error retrieving security group: %q", err) + return false, err + } + + if group == nil { + return false, fmt.Errorf("security group not found: %s", securityGroupID) + } + + klog.V(2).Infof("Existing security group ingress: %s %v", securityGroupID, group.IpPermissions) + + changes := []*ec2.IpPermission{} + for _, addPermission := range addPermissions { + hasUserID := false + for i := range addPermission.UserIdGroupPairs { + if addPermission.UserIdGroupPairs[i].UserId != nil { + hasUserID = true + } + } + + found := false + for _, groupPermission := range group.IpPermissions { + if ipPermissionExists(addPermission, groupPermission, hasUserID) { + found = true + break + } + } + + if !found { + changes = append(changes, addPermission) + } + } + + if len(changes) == 0 { + return false, nil + } + + klog.V(2).Infof("Adding security group ingress: %s %v", securityGroupID, changes) + + request := &ec2.AuthorizeSecurityGroupIngressInput{} + request.GroupId = &securityGroupID + request.IpPermissions = changes + _, err = c.ec2.AuthorizeSecurityGroupIngress(request) + if err != nil { + klog.Warningf("Error authorizing security group ingress %q", err) + return false, fmt.Errorf("error authorizing security group ingress: %q", err) + } + + return true, nil +} + +// Makes sure the security group no longer includes the specified permissions +// Returns true if and only if changes were made +// If the security group no longer exists, will return (false, nil) +func (c *Cloud) removeSecurityGroupIngress(securityGroupID string, removePermissions []*ec2.IpPermission) (bool, error) { + // We do not want to make changes to the Global defined SG + if securityGroupID == c.cfg.Global.ElbSecurityGroup { + return false, nil + } + + group, err := c.findSecurityGroup(securityGroupID) + if err != nil { + klog.Warningf("Error retrieving security group: %q", err) + return false, err + } + + if group == nil { + klog.Warning("Security group not found: ", securityGroupID) + return false, nil + } + + changes := []*ec2.IpPermission{} + for _, removePermission := range removePermissions { + hasUserID := false + for i := range removePermission.UserIdGroupPairs { + if removePermission.UserIdGroupPairs[i].UserId != nil { + hasUserID = true + } + } + + var found *ec2.IpPermission + for _, groupPermission := range group.IpPermissions { + if ipPermissionExists(removePermission, groupPermission, hasUserID) { + found = removePermission + break + } + } + + if found != nil { + changes = append(changes, found) + } + } + + if len(changes) == 0 { + return false, nil + } + + klog.V(2).Infof("Removing security group ingress: %s %v", securityGroupID, changes) + + request := &ec2.RevokeSecurityGroupIngressInput{} + request.GroupId = &securityGroupID + request.IpPermissions = changes + _, err = c.ec2.RevokeSecurityGroupIngress(request) + if err != nil { + klog.Warningf("Error revoking security group ingress: %q", err) + return false, err + } + + return true, nil +} + +// Makes sure the security group exists. +// For multi-cluster isolation, name must be globally unique, for example derived from the service UUID. +// Additional tags can be specified +// Returns the security group id or error +func (c *Cloud) ensureSecurityGroup(name string, description string, additionalTags map[string]string) (string, error) { + groupID := "" + attempt := 0 + for { + attempt++ + + // Note that we do _not_ add our tag filters; group-name + vpc-id is the EC2 primary key. + // However, we do check that it matches our tags. + // If it doesn't have any tags, we tag it; this is how we recover if we failed to tag before. + // If it has a different cluster's tags, that is an error. + // This shouldn't happen because name is expected to be globally unique (UUID derived) + request := &ec2.DescribeSecurityGroupsInput{} + request.Filters = []*ec2.Filter{ + newEc2Filter("group-name", name), + newEc2Filter("vpc-id", c.vpcID), + } + + securityGroups, err := c.ec2.DescribeSecurityGroups(request) + if err != nil { + return "", err + } + + if len(securityGroups) >= 1 { + if len(securityGroups) > 1 { + klog.Warningf("Found multiple security groups with name: %q", name) + } + err := c.tagging.readRepairClusterTags( + c.ec2, aws.StringValue(securityGroups[0].GroupId), + ResourceLifecycleOwned, nil, securityGroups[0].Tags) + if err != nil { + return "", err + } + + return aws.StringValue(securityGroups[0].GroupId), nil + } + + createRequest := &ec2.CreateSecurityGroupInput{} + createRequest.VpcId = &c.vpcID + createRequest.GroupName = &name + createRequest.Description = &description + tags := c.tagging.buildTags(ResourceLifecycleOwned, additionalTags) + var awsTags []*ec2.Tag + for k, v := range tags { + tag := &ec2.Tag{ + Key: aws.String(k), + Value: aws.String(v), + } + awsTags = append(awsTags, tag) + } + createRequest.TagSpecifications = []*ec2.TagSpecification{ + { + ResourceType: aws.String(ec2.ResourceTypeSecurityGroup), + Tags: awsTags, + }, + } + + createResponse, err := c.ec2.CreateSecurityGroup(createRequest) + if err != nil { + ignore := false + switch err := err.(type) { + case awserr.Error: + if err.Code() == "InvalidGroup.Duplicate" && attempt < MaxReadThenCreateRetries { + klog.V(2).Infof("Got InvalidGroup.Duplicate while creating security group (race?); will retry") + ignore = true + } + } + if !ignore { + klog.Errorf("Error creating security group: %q", err) + return "", err + } + time.Sleep(1 * time.Second) + } else { + groupID = aws.StringValue(createResponse.GroupId) + break + } + } + if groupID == "" { + return "", fmt.Errorf("created security group, but id was not returned: %s", name) + } + + return groupID, nil +} + +// Finds the value for a given tag. +func findTag(tags []*ec2.Tag, key string) (string, bool) { + for _, tag := range tags { + if aws.StringValue(tag.Key) == key { + return aws.StringValue(tag.Value), true + } + } + return "", false +} + +// Finds the subnets associated with the cluster, by matching cluster tags if present. +// For maximal backwards compatibility, if no subnets are tagged, it will fall-back to the current subnet. +// However, in future this will likely be treated as an error. +func (c *Cloud) findSubnets() ([]*ec2.Subnet, error) { + request := &ec2.DescribeSubnetsInput{} + request.Filters = []*ec2.Filter{newEc2Filter("vpc-id", c.vpcID)} + + subnets, err := c.ec2.DescribeSubnets(request) + if err != nil { + return nil, fmt.Errorf("error describing subnets: %q", err) + } + + var matches []*ec2.Subnet + for _, subnet := range subnets { + if c.tagging.hasClusterTag(subnet.Tags) { + matches = append(matches, subnet) + } else if c.tagging.hasNoClusterPrefixTag(subnet.Tags) { + matches = append(matches, subnet) + } + } + + if len(matches) != 0 { + return matches, nil + } + + // Fall back to the current instance subnets, if nothing is tagged + klog.Warningf("No tagged subnets found; will fall-back to the current subnet only. This is likely to be an error in a future version of k8s.") + + request = &ec2.DescribeSubnetsInput{} + request.Filters = []*ec2.Filter{newEc2Filter("subnet-id", c.selfAWSInstance.subnetID)} + + subnets, err = c.ec2.DescribeSubnets(request) + if err != nil { + return nil, fmt.Errorf("error describing subnets: %q", err) + } + + return subnets, nil +} + +// Finds the subnets to use for an ELB we are creating. +// Normal (Internet-facing) ELBs must use public subnets, so we skip private subnets. +// Internal ELBs can use public or private subnets, but if we have a private subnet we should prefer that. +func (c *Cloud) findELBSubnets(internalELB bool) ([]string, error) { + vpcIDFilter := newEc2Filter("vpc-id", c.vpcID) + + subnets, err := c.findSubnets() + if err != nil { + return nil, err + } + + rRequest := &ec2.DescribeRouteTablesInput{} + rRequest.Filters = []*ec2.Filter{vpcIDFilter} + rt, err := c.ec2.DescribeRouteTables(rRequest) + if err != nil { + return nil, fmt.Errorf("error describe route table: %q", err) + } + + subnetsByAZ := make(map[string]*ec2.Subnet) + for _, subnet := range subnets { + az := aws.StringValue(subnet.AvailabilityZone) + id := aws.StringValue(subnet.SubnetId) + if az == "" || id == "" { + klog.Warningf("Ignoring subnet with empty az/id: %v", subnet) + continue + } + + isPublic, err := isSubnetPublic(rt, id) + if err != nil { + return nil, err + } + if !internalELB && !isPublic { + klog.V(2).Infof("Ignoring private subnet for public ELB %q", id) + continue + } + + existing := subnetsByAZ[az] + if existing == nil { + subnetsByAZ[az] = subnet + continue + } + + // Try to break the tie using the role tag + var tagName string + if internalELB { + tagName = TagNameSubnetInternalELB + } else { + tagName = TagNameSubnetPublicELB + } + + _, existingHasTag := findTag(existing.Tags, tagName) + _, subnetHasTag := findTag(subnet.Tags, tagName) + + if existingHasTag != subnetHasTag { + if subnetHasTag { + subnetsByAZ[az] = subnet + } + continue + } + + // Prefer the one with the cluster Tag + existingHasClusterTag := c.tagging.hasClusterTag(existing.Tags) + subnetHasClusterTag := c.tagging.hasClusterTag(subnet.Tags) + if existingHasClusterTag != subnetHasClusterTag { + if subnetHasClusterTag { + subnetsByAZ[az] = subnet + } + continue + } + + // If we have two subnets for the same AZ we arbitrarily choose the one that is first lexicographically. + if strings.Compare(*existing.SubnetId, *subnet.SubnetId) > 0 { + klog.Warningf("Found multiple subnets in AZ %q; choosing %q between subnets %q and %q", az, *subnet.SubnetId, *existing.SubnetId, *subnet.SubnetId) + subnetsByAZ[az] = subnet + continue + } + + klog.Warningf("Found multiple subnets in AZ %q; choosing %q between subnets %q and %q", az, *existing.SubnetId, *existing.SubnetId, *subnet.SubnetId) + continue + } + + var azNames []string + for key := range subnetsByAZ { + azNames = append(azNames, key) + } + + sort.Strings(azNames) + + var subnetIDs []string + for _, key := range azNames { + subnetIDs = append(subnetIDs, aws.StringValue(subnetsByAZ[key].SubnetId)) + } + + return subnetIDs, nil +} + +func splitCommaSeparatedString(commaSeparatedString string) []string { + var result []string + parts := strings.Split(commaSeparatedString, ",") + for _, part := range parts { + part = strings.TrimSpace(part) + if len(part) == 0 { + continue + } + result = append(result, part) + } + return result +} + +// parses comma separated values from annotation into string slice, returns true if annotation exists +func parseStringSliceAnnotation(annotations map[string]string, annotation string, value *[]string) bool { + rawValue := "" + if exists := parseStringAnnotation(annotations, annotation, &rawValue); !exists { + return false + } + *value = splitCommaSeparatedString(rawValue) + return true +} + +func (c *Cloud) getLoadBalancerSubnets(service *v1.Service, internalELB bool) ([]string, error) { + var rawSubnetNameOrIDs []string + if exists := parseStringSliceAnnotation(service.Annotations, ServiceAnnotationLoadBalancerSubnets, &rawSubnetNameOrIDs); exists { + return c.resolveSubnetNameOrIDs(rawSubnetNameOrIDs) + } + return c.findELBSubnets(internalELB) +} + +func (c *Cloud) resolveSubnetNameOrIDs(subnetNameOrIDs []string) ([]string, error) { + var subnetIDs []string + var subnetNames []string + if len(subnetNameOrIDs) == 0 { + return []string{}, fmt.Errorf("unable to resolve empty subnet slice") + } + for _, nameOrID := range subnetNameOrIDs { + if strings.HasPrefix(nameOrID, "subnet-") { + subnetIDs = append(subnetIDs, nameOrID) + } else { + subnetNames = append(subnetNames, nameOrID) + } + } + var resolvedSubnets []*ec2.Subnet + if len(subnetIDs) > 0 { + req := &ec2.DescribeSubnetsInput{ + SubnetIds: aws.StringSlice(subnetIDs), + } + subnets, err := c.ec2.DescribeSubnets(req) + if err != nil { + return []string{}, err + } + resolvedSubnets = append(resolvedSubnets, subnets...) + } + if len(subnetNames) > 0 { + req := &ec2.DescribeSubnetsInput{ + Filters: []*ec2.Filter{ + { + Name: aws.String("tag:Name"), + Values: aws.StringSlice(subnetNames), + }, + { + Name: aws.String("vpc-id"), + Values: aws.StringSlice([]string{c.vpcID}), + }, + }, + } + subnets, err := c.ec2.DescribeSubnets(req) + if err != nil { + return []string{}, err + } + resolvedSubnets = append(resolvedSubnets, subnets...) + } + if len(resolvedSubnets) != len(subnetNameOrIDs) { + return []string{}, fmt.Errorf("expected to find %v, but found %v subnets", len(subnetNameOrIDs), len(resolvedSubnets)) + } + var subnets []string + for _, subnet := range resolvedSubnets { + subnets = append(subnets, aws.StringValue(subnet.SubnetId)) + } + return subnets, nil +} + +func isSubnetPublic(rt []*ec2.RouteTable, subnetID string) (bool, error) { + var subnetTable *ec2.RouteTable + for _, table := range rt { + for _, assoc := range table.Associations { + if aws.StringValue(assoc.SubnetId) == subnetID { + subnetTable = table + break + } + } + } + + if subnetTable == nil { + // If there is no explicit association, the subnet will be implicitly + // associated with the VPC's main routing table. + for _, table := range rt { + for _, assoc := range table.Associations { + if aws.BoolValue(assoc.Main) == true { + klog.V(4).Infof("Assuming implicit use of main routing table %s for %s", + aws.StringValue(table.RouteTableId), subnetID) + subnetTable = table + break + } + } + } + } + + if subnetTable == nil { + return false, fmt.Errorf("could not locate routing table for subnet %s", subnetID) + } + + for _, route := range subnetTable.Routes { + // There is no direct way in the AWS API to determine if a subnet is public or private. + // A public subnet is one which has an internet gateway route + // we look for the gatewayId and make sure it has the prefix of igw to differentiate + // from the default in-subnet route which is called "local" + // or other virtual gateway (starting with vgv) + // or vpc peering connections (starting with pcx). + if strings.HasPrefix(aws.StringValue(route.GatewayId), "igw") { + return true, nil + } + } + + return false, nil +} + +type portSets struct { + names sets.String + numbers sets.Int64 +} + +// getPortSets returns a portSets structure representing port names and numbers +// that the comma-separated string describes. If the input is empty or equal to +// "*", a nil pointer is returned. +func getPortSets(annotation string) (ports *portSets) { + if annotation != "" && annotation != "*" { + ports = &portSets{ + sets.NewString(), + sets.NewInt64(), + } + portStringSlice := strings.Split(annotation, ",") + for _, item := range portStringSlice { + port, err := strconv.Atoi(item) + if err != nil { + ports.names.Insert(item) + } else { + ports.numbers.Insert(int64(port)) + } + } + } + return +} + +// This function is useful in extracting the security group list from annotation +func getSGListFromAnnotation(annotatedSG string) []string { + sgList := []string{} + for _, extraSG := range strings.Split(annotatedSG, ",") { + extraSG = strings.TrimSpace(extraSG) + if len(extraSG) > 0 { + sgList = append(sgList, extraSG) + } + } + return sgList +} + +// buildELBSecurityGroupList returns list of SecurityGroups which should be +// attached to ELB created by a service. List always consist of at least +// 1 member which is an SG created for this service or a SG from the Global config. +// Extra groups can be specified via annotation, as can extra tags for any +// new groups. The annotation "ServiceAnnotationLoadBalancerSecurityGroups" allows for +// setting the security groups specified. +func (c *Cloud) buildELBSecurityGroupList(serviceName types.NamespacedName, loadBalancerName string, annotations map[string]string) ([]string, bool, error) { + var err error + var securityGroupID string + // We do not want to make changes to a Global defined SG + var setupSg = false + + sgList := getSGListFromAnnotation(annotations[ServiceAnnotationLoadBalancerSecurityGroups]) + + // If no Security Groups have been specified with the ServiceAnnotationLoadBalancerSecurityGroups annotation, we add the default one. + if len(sgList) == 0 { + if c.cfg.Global.ElbSecurityGroup != "" { + sgList = append(sgList, c.cfg.Global.ElbSecurityGroup) + } else { + // Create a security group for the load balancer + sgName := "k8s-elb-" + loadBalancerName + sgDescription := fmt.Sprintf("Security group for Kubernetes ELB %s (%v)", loadBalancerName, serviceName) + securityGroupID, err = c.ensureSecurityGroup(sgName, sgDescription, getKeyValuePropertiesFromAnnotation(annotations, ServiceAnnotationLoadBalancerAdditionalTags)) + if err != nil { + klog.Errorf("Error creating load balancer security group: %q", err) + return nil, setupSg, err + } + sgList = append(sgList, securityGroupID) + setupSg = true + } + } + + extraSGList := getSGListFromAnnotation(annotations[ServiceAnnotationLoadBalancerExtraSecurityGroups]) + sgList = append(sgList, extraSGList...) + + return sgList, setupSg, nil +} + +// sortELBSecurityGroupList returns a list of sorted securityGroupIDs based on the original order +// from buildELBSecurityGroupList. The logic is: +// * securityGroups specified by ServiceAnnotationLoadBalancerSecurityGroups appears first in order +// * securityGroups specified by ServiceAnnotationLoadBalancerExtraSecurityGroups appears last in order +func (c *Cloud) sortELBSecurityGroupList(securityGroupIDs []string, annotations map[string]string) { + annotatedSGList := getSGListFromAnnotation(annotations[ServiceAnnotationLoadBalancerSecurityGroups]) + annotatedExtraSGList := getSGListFromAnnotation(annotations[ServiceAnnotationLoadBalancerExtraSecurityGroups]) + annotatedSGIndex := make(map[string]int, len(annotatedSGList)) + annotatedExtraSGIndex := make(map[string]int, len(annotatedExtraSGList)) + + for i, sgID := range annotatedSGList { + annotatedSGIndex[sgID] = i + } + for i, sgID := range annotatedExtraSGList { + annotatedExtraSGIndex[sgID] = i + } + sgOrderMapping := make(map[string]int, len(securityGroupIDs)) + for _, sgID := range securityGroupIDs { + if i, ok := annotatedSGIndex[sgID]; ok { + sgOrderMapping[sgID] = i + } else if j, ok := annotatedExtraSGIndex[sgID]; ok { + sgOrderMapping[sgID] = len(annotatedSGIndex) + 1 + j + } else { + sgOrderMapping[sgID] = len(annotatedSGIndex) + } + } + sort.Slice(securityGroupIDs, func(i, j int) bool { + return sgOrderMapping[securityGroupIDs[i]] < sgOrderMapping[securityGroupIDs[j]] + }) +} + +// buildListener creates a new listener from the given port, adding an SSL certificate +// if indicated by the appropriate annotations. +func buildListener(port v1.ServicePort, annotations map[string]string, sslPorts *portSets) (*elb.Listener, error) { + loadBalancerPort := int64(port.Port) + portName := strings.ToLower(port.Name) + instancePort := int64(port.NodePort) + protocol := strings.ToLower(string(port.Protocol)) + instanceProtocol := protocol + + listener := &elb.Listener{} + listener.InstancePort = &instancePort + listener.LoadBalancerPort = &loadBalancerPort + certID := annotations[ServiceAnnotationLoadBalancerCertificate] + if certID != "" && (sslPorts == nil || sslPorts.numbers.Has(loadBalancerPort) || sslPorts.names.Has(portName)) { + instanceProtocol = annotations[ServiceAnnotationLoadBalancerBEProtocol] + if instanceProtocol == "" { + protocol = "ssl" + instanceProtocol = "tcp" + } else { + protocol = backendProtocolMapping[instanceProtocol] + if protocol == "" { + return nil, fmt.Errorf("Invalid backend protocol %s for %s in %s", instanceProtocol, certID, ServiceAnnotationLoadBalancerBEProtocol) + } + } + listener.SSLCertificateId = &certID + } else if annotationProtocol := annotations[ServiceAnnotationLoadBalancerBEProtocol]; annotationProtocol == "http" { + instanceProtocol = annotationProtocol + protocol = "http" + } + + listener.Protocol = &protocol + listener.InstanceProtocol = &instanceProtocol + + return listener, nil +} + +func (c *Cloud) getSubnetCidrs(subnetIDs []string) ([]string, error) { + request := &ec2.DescribeSubnetsInput{} + for _, subnetID := range subnetIDs { + request.SubnetIds = append(request.SubnetIds, aws.String(subnetID)) + } + + subnets, err := c.ec2.DescribeSubnets(request) + if err != nil { + return nil, fmt.Errorf("error querying Subnet for ELB: %q", err) + } + if len(subnets) != len(subnetIDs) { + return nil, fmt.Errorf("error querying Subnet for ELB, got %d subnets for %v", len(subnets), subnetIDs) + } + + cidrs := make([]string, 0, len(subnets)) + for _, subnet := range subnets { + cidrs = append(cidrs, aws.StringValue(subnet.CidrBlock)) + } + return cidrs, nil +} + +func parseStringAnnotation(annotations map[string]string, annotation string, value *string) bool { + if v, ok := annotations[annotation]; ok { + *value = v + return true + } + return false +} + +func parseInt64Annotation(annotations map[string]string, annotation string, value *int64) (bool, error) { + if v, ok := annotations[annotation]; ok { + parsed, err := strconv.ParseInt(v, 10, 0) + if err != nil { + return true, fmt.Errorf("failed to parse annotation %v=%v", annotation, v) + } + *value = parsed + return true, nil + } + return false, nil +} + +func (c *Cloud) buildNLBHealthCheckConfiguration(svc *v1.Service) (healthCheckConfig, error) { + hc := healthCheckConfig{ + Port: defaultHealthCheckPort, + Path: defaultHealthCheckPath, + Protocol: elbv2.ProtocolEnumTcp, + Interval: defaultNlbHealthCheckInterval, + Timeout: defaultNlbHealthCheckTimeout, + HealthyThreshold: defaultNlbHealthCheckThreshold, + UnhealthyThreshold: defaultNlbHealthCheckThreshold, + } + if svc.Spec.ExternalTrafficPolicy == v1.ServiceExternalTrafficPolicyTypeLocal { + path, port := servicehelpers.GetServiceHealthCheckPathPort(svc) + hc = healthCheckConfig{ + Port: strconv.Itoa(int(port)), + Path: path, + Protocol: elbv2.ProtocolEnumHttp, + Interval: 10, + Timeout: 10, + HealthyThreshold: 2, + UnhealthyThreshold: 2, + } + } + if parseStringAnnotation(svc.Annotations, ServiceAnnotationLoadBalancerHealthCheckProtocol, &hc.Protocol) { + hc.Protocol = strings.ToUpper(hc.Protocol) + } + switch hc.Protocol { + case elbv2.ProtocolEnumHttp, elbv2.ProtocolEnumHttps: + parseStringAnnotation(svc.Annotations, ServiceAnnotationLoadBalancerHealthCheckPath, &hc.Path) + case elbv2.ProtocolEnumTcp: + hc.Path = "" + default: + return healthCheckConfig{}, fmt.Errorf("Unsupported health check protocol %v", hc.Protocol) + } + + parseStringAnnotation(svc.Annotations, ServiceAnnotationLoadBalancerHealthCheckPort, &hc.Port) + + if _, err := parseInt64Annotation(svc.Annotations, ServiceAnnotationLoadBalancerHCInterval, &hc.Interval); err != nil { + return healthCheckConfig{}, err + } + if _, err := parseInt64Annotation(svc.Annotations, ServiceAnnotationLoadBalancerHCTimeout, &hc.Timeout); err != nil { + return healthCheckConfig{}, err + } + if _, err := parseInt64Annotation(svc.Annotations, ServiceAnnotationLoadBalancerHCHealthyThreshold, &hc.HealthyThreshold); err != nil { + return healthCheckConfig{}, err + } + if _, err := parseInt64Annotation(svc.Annotations, ServiceAnnotationLoadBalancerHCUnhealthyThreshold, &hc.UnhealthyThreshold); err != nil { + return healthCheckConfig{}, err + } + + if hc.Port != defaultHealthCheckPort { + if _, err := strconv.ParseInt(hc.Port, 10, 0); err != nil { + return healthCheckConfig{}, fmt.Errorf("Invalid health check port '%v'", hc.Port) + } + } + return hc, nil +} + +// EnsureLoadBalancer implements LoadBalancer.EnsureLoadBalancer +func (c *Cloud) EnsureLoadBalancer(ctx context.Context, clusterName string, apiService *v1.Service, nodes []*v1.Node) (*v1.LoadBalancerStatus, error) { + annotations := apiService.Annotations + if isLBExternal(annotations) { + return nil, cloudprovider.ImplementedElsewhere + } + klog.V(2).Infof("EnsureLoadBalancer(%v, %v, %v, %v, %v, %v, %v)", + clusterName, apiService.Namespace, apiService.Name, c.region, apiService.Spec.LoadBalancerIP, apiService.Spec.Ports, annotations) + + if apiService.Spec.SessionAffinity != v1.ServiceAffinityNone { + // ELB supports sticky sessions, but only when configured for HTTP/HTTPS + return nil, fmt.Errorf("unsupported load balancer affinity: %v", apiService.Spec.SessionAffinity) + } + + if len(apiService.Spec.Ports) == 0 { + return nil, fmt.Errorf("requested load balancer with no ports") + } + if err := checkMixedProtocol(apiService.Spec.Ports); err != nil { + return nil, err + } + // Figure out what mappings we want on the load balancer + listeners := []*elb.Listener{} + v2Mappings := []nlbPortMapping{} + + sslPorts := getPortSets(annotations[ServiceAnnotationLoadBalancerSSLPorts]) + for _, port := range apiService.Spec.Ports { + if err := checkProtocol(port, annotations); err != nil { + return nil, err + } + + if port.NodePort == 0 { + klog.Errorf("Ignoring port without NodePort defined: %v", port) + continue + } + + if isNLB(annotations) { + portMapping := nlbPortMapping{ + FrontendPort: int64(port.Port), + FrontendProtocol: string(port.Protocol), + TrafficPort: int64(port.NodePort), + TrafficProtocol: string(port.Protocol), + } + var err error + if portMapping.HealthCheckConfig, err = c.buildNLBHealthCheckConfiguration(apiService); err != nil { + return nil, err + } + + certificateARN := annotations[ServiceAnnotationLoadBalancerCertificate] + if port.Protocol != v1.ProtocolUDP && certificateARN != "" && (sslPorts == nil || sslPorts.numbers.Has(int64(port.Port)) || sslPorts.names.Has(port.Name)) { + portMapping.FrontendProtocol = elbv2.ProtocolEnumTls + portMapping.SSLCertificateARN = certificateARN + portMapping.SSLPolicy = annotations[ServiceAnnotationLoadBalancerSSLNegotiationPolicy] + + if backendProtocol := annotations[ServiceAnnotationLoadBalancerBEProtocol]; backendProtocol == "ssl" { + portMapping.TrafficProtocol = elbv2.ProtocolEnumTls + } + } + + v2Mappings = append(v2Mappings, portMapping) + } else { + listener, err := buildListener(port, annotations, sslPorts) + if err != nil { + return nil, err + } + listeners = append(listeners, listener) + } + } + + if apiService.Spec.LoadBalancerIP != "" { + return nil, fmt.Errorf("LoadBalancerIP cannot be specified for AWS ELB") + } + + instances, err := c.findInstancesForELB(nodes, annotations) + if err != nil { + return nil, err + } + + sourceRanges, err := servicehelpers.GetLoadBalancerSourceRanges(apiService) + if err != nil { + return nil, err + } + + // Determine if this is tagged as an Internal ELB + internalELB := false + internalAnnotation := apiService.Annotations[ServiceAnnotationLoadBalancerInternal] + if internalAnnotation == "false" { + internalELB = false + } else if internalAnnotation != "" { + internalELB = true + } + + if isNLB(annotations) { + // Find the subnets that the ELB will live in + subnetIDs, err := c.getLoadBalancerSubnets(apiService, internalELB) + if err != nil { + klog.Errorf("Error listing subnets in VPC: %q", err) + return nil, err + } + // Bail out early if there are no subnets + if len(subnetIDs) == 0 { + return nil, fmt.Errorf("could not find any suitable subnets for creating the ELB") + } + + loadBalancerName := c.GetLoadBalancerName(ctx, clusterName, apiService) + serviceName := types.NamespacedName{Namespace: apiService.Namespace, Name: apiService.Name} + + instanceIDs := []string{} + for id := range instances { + instanceIDs = append(instanceIDs, string(id)) + } + + v2LoadBalancer, err := c.ensureLoadBalancerv2( + serviceName, + loadBalancerName, + v2Mappings, + instanceIDs, + subnetIDs, + internalELB, + annotations, + ) + if err != nil { + return nil, err + } + + subnetCidrs, err := c.getSubnetCidrs(subnetIDs) + if err != nil { + klog.Errorf("Error getting subnet cidrs: %q", err) + return nil, err + } + + sourceRangeCidrs := []string{} + for cidr := range sourceRanges { + sourceRangeCidrs = append(sourceRangeCidrs, cidr) + } + if len(sourceRangeCidrs) == 0 { + sourceRangeCidrs = append(sourceRangeCidrs, "0.0.0.0/0") + } + + err = c.updateInstanceSecurityGroupsForNLB(loadBalancerName, instances, subnetCidrs, sourceRangeCidrs, v2Mappings) + if err != nil { + klog.Warningf("Error opening ingress rules for the load balancer to the instances: %q", err) + return nil, err + } + + // We don't have an `ensureLoadBalancerInstances()` function for elbv2 + // because `ensureLoadBalancerv2()` requires instance Ids + + // TODO: Wait for creation? + return v2toStatus(v2LoadBalancer), nil + } + + // Determine if we need to set the Proxy protocol policy + proxyProtocol := false + proxyProtocolAnnotation := apiService.Annotations[ServiceAnnotationLoadBalancerProxyProtocol] + if proxyProtocolAnnotation != "" { + if proxyProtocolAnnotation != "*" { + return nil, fmt.Errorf("annotation %q=%q detected, but the only value supported currently is '*'", ServiceAnnotationLoadBalancerProxyProtocol, proxyProtocolAnnotation) + } + proxyProtocol = true + } + + // Some load balancer attributes are required, so defaults are set. These can be overridden by annotations. + loadBalancerAttributes := &elb.LoadBalancerAttributes{ + AccessLog: &elb.AccessLog{Enabled: aws.Bool(false)}, + ConnectionDraining: &elb.ConnectionDraining{Enabled: aws.Bool(false)}, + ConnectionSettings: &elb.ConnectionSettings{IdleTimeout: aws.Int64(60)}, + CrossZoneLoadBalancing: &elb.CrossZoneLoadBalancing{Enabled: aws.Bool(false)}, + } + + // Determine if an access log emit interval has been specified + accessLogEmitIntervalAnnotation := annotations[ServiceAnnotationLoadBalancerAccessLogEmitInterval] + if accessLogEmitIntervalAnnotation != "" { + accessLogEmitInterval, err := strconv.ParseInt(accessLogEmitIntervalAnnotation, 10, 64) + if err != nil { + return nil, fmt.Errorf("error parsing service annotation: %s=%s", + ServiceAnnotationLoadBalancerAccessLogEmitInterval, + accessLogEmitIntervalAnnotation, + ) + } + loadBalancerAttributes.AccessLog.EmitInterval = &accessLogEmitInterval + } + + // Determine if access log enabled/disabled has been specified + accessLogEnabledAnnotation := annotations[ServiceAnnotationLoadBalancerAccessLogEnabled] + if accessLogEnabledAnnotation != "" { + accessLogEnabled, err := strconv.ParseBool(accessLogEnabledAnnotation) + if err != nil { + return nil, fmt.Errorf("error parsing service annotation: %s=%s", + ServiceAnnotationLoadBalancerAccessLogEnabled, + accessLogEnabledAnnotation, + ) + } + loadBalancerAttributes.AccessLog.Enabled = &accessLogEnabled + } + + // Determine if access log s3 bucket name has been specified + accessLogS3BucketNameAnnotation := annotations[ServiceAnnotationLoadBalancerAccessLogS3BucketName] + if accessLogS3BucketNameAnnotation != "" { + loadBalancerAttributes.AccessLog.S3BucketName = &accessLogS3BucketNameAnnotation + } + + // Determine if access log s3 bucket prefix has been specified + accessLogS3BucketPrefixAnnotation := annotations[ServiceAnnotationLoadBalancerAccessLogS3BucketPrefix] + if accessLogS3BucketPrefixAnnotation != "" { + loadBalancerAttributes.AccessLog.S3BucketPrefix = &accessLogS3BucketPrefixAnnotation + } + + // Determine if connection draining enabled/disabled has been specified + connectionDrainingEnabledAnnotation := annotations[ServiceAnnotationLoadBalancerConnectionDrainingEnabled] + if connectionDrainingEnabledAnnotation != "" { + connectionDrainingEnabled, err := strconv.ParseBool(connectionDrainingEnabledAnnotation) + if err != nil { + return nil, fmt.Errorf("error parsing service annotation: %s=%s", + ServiceAnnotationLoadBalancerConnectionDrainingEnabled, + connectionDrainingEnabledAnnotation, + ) + } + loadBalancerAttributes.ConnectionDraining.Enabled = &connectionDrainingEnabled + } + + // Determine if connection draining timeout has been specified + connectionDrainingTimeoutAnnotation := annotations[ServiceAnnotationLoadBalancerConnectionDrainingTimeout] + if connectionDrainingTimeoutAnnotation != "" { + connectionDrainingTimeout, err := strconv.ParseInt(connectionDrainingTimeoutAnnotation, 10, 64) + if err != nil { + return nil, fmt.Errorf("error parsing service annotation: %s=%s", + ServiceAnnotationLoadBalancerConnectionDrainingTimeout, + connectionDrainingTimeoutAnnotation, + ) + } + loadBalancerAttributes.ConnectionDraining.Timeout = &connectionDrainingTimeout + } + + // Determine if connection idle timeout has been specified + connectionIdleTimeoutAnnotation := annotations[ServiceAnnotationLoadBalancerConnectionIdleTimeout] + if connectionIdleTimeoutAnnotation != "" { + connectionIdleTimeout, err := strconv.ParseInt(connectionIdleTimeoutAnnotation, 10, 64) + if err != nil { + return nil, fmt.Errorf("error parsing service annotation: %s=%s", + ServiceAnnotationLoadBalancerConnectionIdleTimeout, + connectionIdleTimeoutAnnotation, + ) + } + loadBalancerAttributes.ConnectionSettings.IdleTimeout = &connectionIdleTimeout + } + + // Determine if cross zone load balancing enabled/disabled has been specified + crossZoneLoadBalancingEnabledAnnotation := annotations[ServiceAnnotationLoadBalancerCrossZoneLoadBalancingEnabled] + if crossZoneLoadBalancingEnabledAnnotation != "" { + crossZoneLoadBalancingEnabled, err := strconv.ParseBool(crossZoneLoadBalancingEnabledAnnotation) + if err != nil { + return nil, fmt.Errorf("error parsing service annotation: %s=%s", + ServiceAnnotationLoadBalancerCrossZoneLoadBalancingEnabled, + crossZoneLoadBalancingEnabledAnnotation, + ) + } + loadBalancerAttributes.CrossZoneLoadBalancing.Enabled = &crossZoneLoadBalancingEnabled + } + + // Find the subnets that the ELB will live in + subnetIDs, err := c.getLoadBalancerSubnets(apiService, internalELB) + if err != nil { + klog.Errorf("Error listing subnets in VPC: %q", err) + return nil, err + } + + // Bail out early if there are no subnets + if len(subnetIDs) == 0 { + return nil, fmt.Errorf("could not find any suitable subnets for creating the ELB") + } + + loadBalancerName := c.GetLoadBalancerName(ctx, clusterName, apiService) + serviceName := types.NamespacedName{Namespace: apiService.Namespace, Name: apiService.Name} + securityGroupIDs, setupSg, err := c.buildELBSecurityGroupList(serviceName, loadBalancerName, annotations) + if err != nil { + return nil, err + } + if len(securityGroupIDs) == 0 { + return nil, fmt.Errorf("[BUG] ELB can't have empty list of Security Groups to be assigned, this is a Kubernetes bug, please report") + } + + if setupSg { + ec2SourceRanges := []*ec2.IpRange{} + for _, sourceRange := range sourceRanges.StringSlice() { + ec2SourceRanges = append(ec2SourceRanges, &ec2.IpRange{CidrIp: aws.String(sourceRange)}) + } + + permissions := NewIPPermissionSet() + for _, port := range apiService.Spec.Ports { + portInt64 := int64(port.Port) + protocol := strings.ToLower(string(port.Protocol)) + + permission := &ec2.IpPermission{} + permission.FromPort = &portInt64 + permission.ToPort = &portInt64 + permission.IpRanges = ec2SourceRanges + permission.IpProtocol = &protocol + + permissions.Insert(permission) + } + + // Allow ICMP fragmentation packets, important for MTU discovery + { + permission := &ec2.IpPermission{ + IpProtocol: aws.String("icmp"), + FromPort: aws.Int64(3), + ToPort: aws.Int64(4), + IpRanges: ec2SourceRanges, + } + + permissions.Insert(permission) + } + _, err = c.setSecurityGroupIngress(securityGroupIDs[0], permissions) + if err != nil { + return nil, err + } + } + + // Build the load balancer itself + loadBalancer, err := c.ensureLoadBalancer( + serviceName, + loadBalancerName, + listeners, + subnetIDs, + securityGroupIDs, + internalELB, + proxyProtocol, + loadBalancerAttributes, + annotations, + ) + if err != nil { + return nil, err + } + + if sslPolicyName, ok := annotations[ServiceAnnotationLoadBalancerSSLNegotiationPolicy]; ok { + err := c.ensureSSLNegotiationPolicy(loadBalancer, sslPolicyName) + if err != nil { + return nil, err + } + + for _, port := range c.getLoadBalancerTLSPorts(loadBalancer) { + err := c.setSSLNegotiationPolicy(loadBalancerName, sslPolicyName, port) + if err != nil { + return nil, err + } + } + } + + // We only configure a TCP health-check on the first port + var tcpHealthCheckPort int32 + for _, listener := range listeners { + if listener.InstancePort == nil { + continue + } + tcpHealthCheckPort = int32(*listener.InstancePort) + break + } + if path, healthCheckNodePort := servicehelpers.GetServiceHealthCheckPathPort(apiService); path != "" { + klog.V(4).Infof("service %v (%v) needs health checks on :%d%s)", apiService.Name, loadBalancerName, healthCheckNodePort, path) + if annotations[ServiceAnnotationLoadBalancerHealthCheckPort] == defaultHealthCheckPort { + healthCheckNodePort = tcpHealthCheckPort + } + err = c.ensureLoadBalancerHealthCheck(loadBalancer, "HTTP", healthCheckNodePort, path, annotations) + if err != nil { + return nil, fmt.Errorf("Failed to ensure health check for localized service %v on node port %v: %q", loadBalancerName, healthCheckNodePort, err) + } + } else { + klog.V(4).Infof("service %v does not need custom health checks", apiService.Name) + annotationProtocol := strings.ToLower(annotations[ServiceAnnotationLoadBalancerBEProtocol]) + var hcProtocol string + if annotationProtocol == "https" || annotationProtocol == "ssl" { + hcProtocol = "SSL" + } else { + hcProtocol = "TCP" + } + // there must be no path on TCP health check + err = c.ensureLoadBalancerHealthCheck(loadBalancer, hcProtocol, tcpHealthCheckPort, "", annotations) + if err != nil { + return nil, err + } + } + + err = c.updateInstanceSecurityGroupsForLoadBalancer(loadBalancer, instances, annotations) + if err != nil { + klog.Warningf("Error opening ingress rules for the load balancer to the instances: %q", err) + return nil, err + } + + err = c.ensureLoadBalancerInstances(aws.StringValue(loadBalancer.LoadBalancerName), loadBalancer.Instances, instances) + if err != nil { + klog.Warningf("Error registering instances with the load balancer: %q", err) + return nil, err + } + + klog.V(1).Infof("Loadbalancer %s (%v) has DNS name %s", loadBalancerName, serviceName, aws.StringValue(loadBalancer.DNSName)) + + // TODO: Wait for creation? + + status := toStatus(loadBalancer) + return status, nil +} + +// GetLoadBalancer is an implementation of LoadBalancer.GetLoadBalancer +func (c *Cloud) GetLoadBalancer(ctx context.Context, clusterName string, service *v1.Service) (*v1.LoadBalancerStatus, bool, error) { + if isLBExternal(service.Annotations) { + return nil, false, nil + } + loadBalancerName := c.GetLoadBalancerName(ctx, clusterName, service) + + if isNLB(service.Annotations) { + lb, err := c.describeLoadBalancerv2(loadBalancerName) + if err != nil { + return nil, false, err + } + if lb == nil { + return nil, false, nil + } + return v2toStatus(lb), true, nil + } + + lb, err := c.describeLoadBalancer(loadBalancerName) + if err != nil { + return nil, false, err + } + + if lb == nil { + return nil, false, nil + } + + status := toStatus(lb) + return status, true, nil +} + +// GetLoadBalancerName is an implementation of LoadBalancer.GetLoadBalancerName +func (c *Cloud) GetLoadBalancerName(ctx context.Context, clusterName string, service *v1.Service) string { + // TODO: replace DefaultLoadBalancerName to generate more meaningful loadbalancer names. + return cloudprovider.DefaultLoadBalancerName(service) +} + +func toStatus(lb *elb.LoadBalancerDescription) *v1.LoadBalancerStatus { + status := &v1.LoadBalancerStatus{} + + if aws.StringValue(lb.DNSName) != "" { + var ingress v1.LoadBalancerIngress + ingress.Hostname = aws.StringValue(lb.DNSName) + status.Ingress = []v1.LoadBalancerIngress{ingress} + } + + return status +} + +func v2toStatus(lb *elbv2.LoadBalancer) *v1.LoadBalancerStatus { + status := &v1.LoadBalancerStatus{} + if lb == nil { + klog.Error("[BUG] v2toStatus got nil input, this is a Kubernetes bug, please report") + return status + } + + // We check for Active or Provisioning, the only successful statuses + if aws.StringValue(lb.DNSName) != "" && (aws.StringValue(lb.State.Code) == elbv2.LoadBalancerStateEnumActive || + aws.StringValue(lb.State.Code) == elbv2.LoadBalancerStateEnumProvisioning) { + var ingress v1.LoadBalancerIngress + ingress.Hostname = aws.StringValue(lb.DNSName) + status.Ingress = []v1.LoadBalancerIngress{ingress} + } + + return status +} + +// Returns the first security group for an instance, or nil +// We only create instances with one security group, so we don't expect multiple security groups. +// However, if there are multiple security groups, we will choose the one tagged with our cluster filter. +// Otherwise we will return an error. +func findSecurityGroupForInstance(instance *ec2.Instance, taggedSecurityGroups map[string]*ec2.SecurityGroup) (*ec2.GroupIdentifier, error) { + instanceID := aws.StringValue(instance.InstanceId) + + var tagged []*ec2.GroupIdentifier + var untagged []*ec2.GroupIdentifier + for _, group := range instance.SecurityGroups { + groupID := aws.StringValue(group.GroupId) + if groupID == "" { + klog.Warningf("Ignoring security group without id for instance %q: %v", instanceID, group) + continue + } + _, isTagged := taggedSecurityGroups[groupID] + if isTagged { + tagged = append(tagged, group) + } else { + untagged = append(untagged, group) + } + } + + if len(tagged) > 0 { + // We create instances with one SG + // If users create multiple SGs, they must tag one of them as being k8s owned + if len(tagged) != 1 { + taggedGroups := "" + for _, v := range tagged { + taggedGroups += fmt.Sprintf("%s(%s) ", *v.GroupId, *v.GroupName) + } + return nil, fmt.Errorf("Multiple tagged security groups found for instance %s; ensure only the k8s security group is tagged; the tagged groups were %v", instanceID, taggedGroups) + } + return tagged[0], nil + } + + if len(untagged) > 0 { + // For back-compat, we will allow a single untagged SG + if len(untagged) != 1 { + return nil, fmt.Errorf("Multiple untagged security groups found for instance %s; ensure the k8s security group is tagged", instanceID) + } + return untagged[0], nil + } + + klog.Warningf("No security group found for instance %q", instanceID) + return nil, nil +} + +// Return all the security groups that are tagged as being part of our cluster +func (c *Cloud) getTaggedSecurityGroups() (map[string]*ec2.SecurityGroup, error) { + request := &ec2.DescribeSecurityGroupsInput{} + groups, err := c.ec2.DescribeSecurityGroups(request) + if err != nil { + return nil, fmt.Errorf("error querying security groups: %q", err) + } + + m := make(map[string]*ec2.SecurityGroup) + for _, group := range groups { + if !c.tagging.hasClusterTag(group.Tags) { + continue + } + + id := aws.StringValue(group.GroupId) + if id == "" { + klog.Warningf("Ignoring group without id: %v", group) + continue + } + m[id] = group + } + return m, nil +} + +// Open security group ingress rules on the instances so that the load balancer can talk to them +// Will also remove any security groups ingress rules for the load balancer that are _not_ needed for allInstances +func (c *Cloud) updateInstanceSecurityGroupsForLoadBalancer(lb *elb.LoadBalancerDescription, instances map[InstanceID]*ec2.Instance, annotations map[string]string) error { + if c.cfg.Global.DisableSecurityGroupIngress { + return nil + } + + // Determine the load balancer security group id + lbSecurityGroupIDs := aws.StringValueSlice(lb.SecurityGroups) + if len(lbSecurityGroupIDs) == 0 { + return fmt.Errorf("could not determine security group for load balancer: %s", aws.StringValue(lb.LoadBalancerName)) + } + c.sortELBSecurityGroupList(lbSecurityGroupIDs, annotations) + loadBalancerSecurityGroupID := lbSecurityGroupIDs[0] + + // Get the actual list of groups that allow ingress from the load-balancer + var actualGroups []*ec2.SecurityGroup + { + describeRequest := &ec2.DescribeSecurityGroupsInput{} + describeRequest.Filters = []*ec2.Filter{ + newEc2Filter("ip-permission.group-id", loadBalancerSecurityGroupID), + } + response, err := c.ec2.DescribeSecurityGroups(describeRequest) + if err != nil { + return fmt.Errorf("error querying security groups for ELB: %q", err) + } + for _, sg := range response { + if !c.tagging.hasClusterTag(sg.Tags) { + continue + } + actualGroups = append(actualGroups, sg) + } + } + + taggedSecurityGroups, err := c.getTaggedSecurityGroups() + if err != nil { + return fmt.Errorf("error querying for tagged security groups: %q", err) + } + + // Open the firewall from the load balancer to the instance + // We don't actually have a trivial way to know in advance which security group the instance is in + // (it is probably the node security group, but we don't easily have that). + // However, we _do_ have the list of security groups on the instance records. + + // Map containing the changes we want to make; true to add, false to remove + instanceSecurityGroupIds := map[string]bool{} + + // Scan instances for groups we want open + for _, instance := range instances { + securityGroup, err := findSecurityGroupForInstance(instance, taggedSecurityGroups) + if err != nil { + return err + } + + if securityGroup == nil { + klog.Warning("Ignoring instance without security group: ", aws.StringValue(instance.InstanceId)) + continue + } + id := aws.StringValue(securityGroup.GroupId) + if id == "" { + klog.Warningf("found security group without id: %v", securityGroup) + continue + } + + instanceSecurityGroupIds[id] = true + } + + // Compare to actual groups + for _, actualGroup := range actualGroups { + actualGroupID := aws.StringValue(actualGroup.GroupId) + if actualGroupID == "" { + klog.Warning("Ignoring group without ID: ", actualGroup) + continue + } + + adding, found := instanceSecurityGroupIds[actualGroupID] + if found && adding { + // We don't need to make a change; the permission is already in place + delete(instanceSecurityGroupIds, actualGroupID) + } else { + // This group is not needed by allInstances; delete it + instanceSecurityGroupIds[actualGroupID] = false + } + } + + for instanceSecurityGroupID, add := range instanceSecurityGroupIds { + if add { + klog.V(2).Infof("Adding rule for traffic from the load balancer (%s) to instances (%s)", loadBalancerSecurityGroupID, instanceSecurityGroupID) + } else { + klog.V(2).Infof("Removing rule for traffic from the load balancer (%s) to instance (%s)", loadBalancerSecurityGroupID, instanceSecurityGroupID) + } + sourceGroupID := &ec2.UserIdGroupPair{} + sourceGroupID.GroupId = &loadBalancerSecurityGroupID + + allProtocols := "-1" + + permission := &ec2.IpPermission{} + permission.IpProtocol = &allProtocols + permission.UserIdGroupPairs = []*ec2.UserIdGroupPair{sourceGroupID} + + permissions := []*ec2.IpPermission{permission} + + if add { + changed, err := c.addSecurityGroupIngress(instanceSecurityGroupID, permissions) + if err != nil { + return err + } + if !changed { + klog.Warning("Allowing ingress was not needed; concurrent change? groupId=", instanceSecurityGroupID) + } + } else { + changed, err := c.removeSecurityGroupIngress(instanceSecurityGroupID, permissions) + if err != nil { + return err + } + if !changed { + klog.Warning("Revoking ingress was not needed; concurrent change? groupId=", instanceSecurityGroupID) + } + } + } + + return nil +} + +// EnsureLoadBalancerDeleted implements LoadBalancer.EnsureLoadBalancerDeleted. +func (c *Cloud) EnsureLoadBalancerDeleted(ctx context.Context, clusterName string, service *v1.Service) error { + if isLBExternal(service.Annotations) { + return nil + } + loadBalancerName := c.GetLoadBalancerName(ctx, clusterName, service) + + if isNLB(service.Annotations) { + lb, err := c.describeLoadBalancerv2(loadBalancerName) + if err != nil { + return err + } + if lb == nil { + klog.Info("Load balancer already deleted: ", loadBalancerName) + return nil + } + + // Delete the LoadBalancer and target groups + // + // Deleting a target group while associated with a load balancer will + // fail. We delete the loadbalancer first. This does leave the + // possibility of zombie target groups if DeleteLoadBalancer() fails + // + // * Get target groups for NLB + // * Delete Load Balancer + // * Delete target groups + // * Clean up SecurityGroupRules + { + + targetGroups, err := c.elbv2.DescribeTargetGroups( + &elbv2.DescribeTargetGroupsInput{LoadBalancerArn: lb.LoadBalancerArn}, + ) + if err != nil { + return fmt.Errorf("error listing target groups before deleting load balancer: %q", err) + } + + _, err = c.elbv2.DeleteLoadBalancer( + &elbv2.DeleteLoadBalancerInput{LoadBalancerArn: lb.LoadBalancerArn}, + ) + if err != nil { + return fmt.Errorf("error deleting load balancer %q: %v", loadBalancerName, err) + } + + for _, group := range targetGroups.TargetGroups { + _, err := c.elbv2.DeleteTargetGroup( + &elbv2.DeleteTargetGroupInput{TargetGroupArn: group.TargetGroupArn}, + ) + if err != nil { + return fmt.Errorf("error deleting target groups after deleting load balancer: %q", err) + } + } + } + + return c.updateInstanceSecurityGroupsForNLB(loadBalancerName, nil, nil, nil, nil) + } + + lb, err := c.describeLoadBalancer(loadBalancerName) + if err != nil { + return err + } + + if lb == nil { + klog.Info("Load balancer already deleted: ", loadBalancerName) + return nil + } + + { + // De-authorize the load balancer security group from the instances security group + err = c.updateInstanceSecurityGroupsForLoadBalancer(lb, nil, service.Annotations) + if err != nil { + klog.Errorf("Error deregistering load balancer from instance security groups: %q", err) + return err + } + } + + { + // Delete the load balancer itself + request := &elb.DeleteLoadBalancerInput{} + request.LoadBalancerName = lb.LoadBalancerName + + _, err = c.elb.DeleteLoadBalancer(request) + if err != nil { + // TODO: Check if error was because load balancer was concurrently deleted + klog.Errorf("Error deleting load balancer: %q", err) + return err + } + } + + { + // Delete the security group(s) for the load balancer + // Note that this is annoying: the load balancer disappears from the API immediately, but it is still + // deleting in the background. We get a DependencyViolation until the load balancer has deleted itself + + var loadBalancerSGs = aws.StringValueSlice(lb.SecurityGroups) + + describeRequest := &ec2.DescribeSecurityGroupsInput{} + describeRequest.Filters = []*ec2.Filter{ + newEc2Filter("group-id", loadBalancerSGs...), + } + response, err := c.ec2.DescribeSecurityGroups(describeRequest) + if err != nil { + return fmt.Errorf("error querying security groups for ELB: %q", err) + } + + // Collect the security groups to delete + securityGroupIDs := map[string]struct{}{} + annotatedSgSet := map[string]bool{} + annotatedSgsList := getSGListFromAnnotation(service.Annotations[ServiceAnnotationLoadBalancerSecurityGroups]) + annotatedExtraSgsList := getSGListFromAnnotation(service.Annotations[ServiceAnnotationLoadBalancerExtraSecurityGroups]) + annotatedSgsList = append(annotatedSgsList, annotatedExtraSgsList...) + + for _, sg := range annotatedSgsList { + annotatedSgSet[sg] = true + } + + for _, sg := range response { + sgID := aws.StringValue(sg.GroupId) + + if sgID == c.cfg.Global.ElbSecurityGroup { + //We don't want to delete a security group that was defined in the Cloud Configuration. + continue + } + if sgID == "" { + klog.Warningf("Ignoring empty security group in %s", service.Name) + continue + } + + if !c.tagging.hasClusterTag(sg.Tags) { + klog.Warningf("Ignoring security group with no cluster tag in %s", service.Name) + continue + } + + // This is an extra protection of deletion of non provisioned Security Group which is annotated with `service.beta.kubernetes.io/aws-load-balancer-security-groups`. + if _, ok := annotatedSgSet[sgID]; ok { + klog.Warningf("Ignoring security group with annotation `service.beta.kubernetes.io/aws-load-balancer-security-groups` or service.beta.kubernetes.io/aws-load-balancer-extra-security-groups in %s", service.Name) + continue + } + + securityGroupIDs[sgID] = struct{}{} + } + + // Loop through and try to delete them + timeoutAt := time.Now().Add(time.Second * 600) + for { + for securityGroupID := range securityGroupIDs { + request := &ec2.DeleteSecurityGroupInput{} + request.GroupId = &securityGroupID + _, err := c.ec2.DeleteSecurityGroup(request) + if err == nil { + delete(securityGroupIDs, securityGroupID) + } else { + ignore := false + if awsError, ok := err.(awserr.Error); ok { + if awsError.Code() == "DependencyViolation" { + klog.V(2).Infof("Ignoring DependencyViolation while deleting load-balancer security group (%s), assuming because LB is in process of deleting", securityGroupID) + ignore = true + } + } + if !ignore { + return fmt.Errorf("error while deleting load balancer security group (%s): %q", securityGroupID, err) + } + } + } + + if len(securityGroupIDs) == 0 { + klog.V(2).Info("Deleted all security groups for load balancer: ", service.Name) + break + } + + if time.Now().After(timeoutAt) { + ids := []string{} + for id := range securityGroupIDs { + ids = append(ids, id) + } + + return fmt.Errorf("timed out deleting ELB: %s. Could not delete security groups %v", service.Name, strings.Join(ids, ",")) + } + + klog.V(2).Info("Waiting for load-balancer to delete so we can delete security groups: ", service.Name) + + time.Sleep(10 * time.Second) + } + } + + return nil +} + +// UpdateLoadBalancer implements LoadBalancer.UpdateLoadBalancer +func (c *Cloud) UpdateLoadBalancer(ctx context.Context, clusterName string, service *v1.Service, nodes []*v1.Node) error { + if isLBExternal(service.Annotations) { + return cloudprovider.ImplementedElsewhere + } + instances, err := c.findInstancesForELB(nodes, service.Annotations) + if err != nil { + return err + } + loadBalancerName := c.GetLoadBalancerName(ctx, clusterName, service) + if isNLB(service.Annotations) { + lb, err := c.describeLoadBalancerv2(loadBalancerName) + if err != nil { + return err + } + if lb == nil { + return fmt.Errorf("Load balancer not found") + } + _, err = c.EnsureLoadBalancer(ctx, clusterName, service, nodes) + return err + } + lb, err := c.describeLoadBalancer(loadBalancerName) + if err != nil { + return err + } + + if lb == nil { + return fmt.Errorf("Load balancer not found") + } + + if sslPolicyName, ok := service.Annotations[ServiceAnnotationLoadBalancerSSLNegotiationPolicy]; ok { + err := c.ensureSSLNegotiationPolicy(lb, sslPolicyName) + if err != nil { + return err + } + for _, port := range c.getLoadBalancerTLSPorts(lb) { + err := c.setSSLNegotiationPolicy(loadBalancerName, sslPolicyName, port) + if err != nil { + return err + } + } + } + + err = c.ensureLoadBalancerInstances(aws.StringValue(lb.LoadBalancerName), lb.Instances, instances) + if err != nil { + return nil + } + + err = c.updateInstanceSecurityGroupsForLoadBalancer(lb, instances, service.Annotations) + if err != nil { + return err + } + + return nil +} + +// Returns the instance with the specified ID +func (c *Cloud) getInstanceByID(instanceID string) (*ec2.Instance, error) { + instances, err := c.getInstancesByIDs([]*string{&instanceID}) + if err != nil { + return nil, err + } + + if len(instances) == 0 { + return nil, cloudprovider.InstanceNotFound + } + if len(instances) > 1 { + return nil, fmt.Errorf("multiple instances found for instance: %s", instanceID) + } + + return instances[instanceID], nil +} + +func (c *Cloud) getInstancesByIDs(instanceIDs []*string) (map[string]*ec2.Instance, error) { + instancesByID := make(map[string]*ec2.Instance) + if len(instanceIDs) == 0 { + return instancesByID, nil + } + + request := &ec2.DescribeInstancesInput{ + InstanceIds: instanceIDs, + } + + instances, err := c.ec2.DescribeInstances(request) + if err != nil { + return nil, err + } + + for _, instance := range instances { + instanceID := aws.StringValue(instance.InstanceId) + if instanceID == "" { + continue + } + + instancesByID[instanceID] = instance + } + + return instancesByID, nil +} + +func (c *Cloud) getInstancesByNodeNames(nodeNames []string, states ...string) ([]*ec2.Instance, error) { + names := aws.StringSlice(nodeNames) + ec2Instances := []*ec2.Instance{} + + for i := 0; i < len(names); i += filterNodeLimit { + end := i + filterNodeLimit + if end > len(names) { + end = len(names) + } + + nameSlice := names[i:end] + + nodeNameFilter := &ec2.Filter{ + Name: aws.String("private-dns-name"), + Values: nameSlice, + } + + filters := []*ec2.Filter{nodeNameFilter} + if len(states) > 0 { + filters = append(filters, newEc2Filter("instance-state-name", states...)) + } + + instances, err := c.describeInstances(filters) + if err != nil { + klog.V(2).Infof("Failed to describe instances %v", nodeNames) + return nil, err + } + ec2Instances = append(ec2Instances, instances...) + } + + if len(ec2Instances) == 0 { + klog.V(3).Infof("Failed to find any instances %v", nodeNames) + return nil, nil + } + return ec2Instances, nil +} + +// TODO: Move to instanceCache +func (c *Cloud) describeInstances(filters []*ec2.Filter) ([]*ec2.Instance, error) { + request := &ec2.DescribeInstancesInput{ + Filters: filters, + } + + response, err := c.ec2.DescribeInstances(request) + if err != nil { + return nil, err + } + + var matches []*ec2.Instance + for _, instance := range response { + if c.tagging.hasClusterTag(instance.Tags) { + matches = append(matches, instance) + } + } + return matches, nil +} + +// mapNodeNameToPrivateDNSName maps a k8s NodeName to an AWS Instance PrivateDNSName +// This is a simple string cast +func mapNodeNameToPrivateDNSName(nodeName types.NodeName) string { + return string(nodeName) +} + +// mapInstanceToNodeName maps a EC2 instance to a k8s NodeName, by extracting the PrivateDNSName +func mapInstanceToNodeName(i *ec2.Instance) types.NodeName { + return types.NodeName(aws.StringValue(i.PrivateDnsName)) +} + +var aliveFilter = []string{ + ec2.InstanceStateNamePending, + ec2.InstanceStateNameRunning, + ec2.InstanceStateNameShuttingDown, + ec2.InstanceStateNameStopping, + ec2.InstanceStateNameStopped, +} + +// Returns the instance with the specified node name +// Returns nil if it does not exist +func (c *Cloud) findInstanceByNodeName(nodeName types.NodeName) (*ec2.Instance, error) { + privateDNSName := mapNodeNameToPrivateDNSName(nodeName) + filters := []*ec2.Filter{ + newEc2Filter("private-dns-name", privateDNSName), + // exclude instances in "terminated" state + newEc2Filter("instance-state-name", aliveFilter...), + } + + instances, err := c.describeInstances(filters) + if err != nil { + return nil, err + } + + if len(instances) == 0 { + return nil, nil + } + if len(instances) > 1 { + return nil, fmt.Errorf("multiple instances found for name: %s", nodeName) + } + return instances[0], nil +} + +// Returns the instance with the specified node name +// Like findInstanceByNodeName, but returns error if node not found +func (c *Cloud) getInstanceByNodeName(nodeName types.NodeName) (*ec2.Instance, error) { + var instance *ec2.Instance + + // we leverage node cache to try to retrieve node's provider id first, as + // get instance by provider id is way more efficient than by filters in + // aws context + awsID, err := c.nodeNameToProviderID(nodeName) + if err != nil { + klog.V(3).Infof("Unable to convert node name %q to aws instanceID, fall back to findInstanceByNodeName: %v", nodeName, err) + instance, err = c.findInstanceByNodeName(nodeName) + } else { + instance, err = c.getInstanceByID(string(awsID)) + } + if err == nil && instance == nil { + return nil, cloudprovider.InstanceNotFound + } + return instance, err +} + +func (c *Cloud) getFullInstance(nodeName types.NodeName) (*awsInstance, *ec2.Instance, error) { + if nodeName == "" { + instance, err := c.getInstanceByID(c.selfAWSInstance.awsID) + return c.selfAWSInstance, instance, err + } + instance, err := c.getInstanceByNodeName(nodeName) + if err != nil { + return nil, nil, err + } + awsInstance := newAWSInstance(c.ec2, instance) + return awsInstance, instance, err +} + +// isFargateNode returns true if given node runs on Fargate compute +func isFargateNode(nodeName string) bool { + return strings.HasPrefix(nodeName, fargateNodeNamePrefix) +} + +func (c *Cloud) nodeNameToProviderID(nodeName types.NodeName) (InstanceID, error) { + if strings.HasPrefix(string(nodeName), rbnNamePrefix) { + return InstanceID(nodeName), nil + } + if len(nodeName) == 0 { + return "", fmt.Errorf("no nodeName provided") + } + + if c.nodeInformerHasSynced == nil || !c.nodeInformerHasSynced() { + return "", fmt.Errorf("node informer has not synced yet") + } + + node, err := c.nodeInformer.Lister().Get(string(nodeName)) + if err != nil { + return "", err + } + if len(node.Spec.ProviderID) == 0 { + return "", fmt.Errorf("node has no providerID") + } + + return KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() +} + +func checkMixedProtocol(ports []v1.ServicePort) error { + if len(ports) == 0 { + return nil + } + firstProtocol := ports[0].Protocol + for _, port := range ports[1:] { + if port.Protocol != firstProtocol { + return fmt.Errorf("mixed protocol is not supported for LoadBalancer") + } + } + return nil +} + +func checkProtocol(port v1.ServicePort, annotations map[string]string) error { + // nlb supports tcp, udp + if isNLB(annotations) && (port.Protocol == v1.ProtocolTCP || port.Protocol == v1.ProtocolUDP) { + return nil + } + // elb only supports tcp + if !isNLB(annotations) && port.Protocol == v1.ProtocolTCP { + return nil + } + return fmt.Errorf("Protocol %s not supported by LoadBalancer", port.Protocol) +} + +func setNodeDisk( + nodeDiskMap map[types.NodeName]map[KubernetesVolumeID]bool, + volumeID KubernetesVolumeID, + nodeName types.NodeName, + check bool) { + + volumeMap := nodeDiskMap[nodeName] + + if volumeMap == nil { + volumeMap = make(map[KubernetesVolumeID]bool) + nodeDiskMap[nodeName] = volumeMap + } + volumeMap[volumeID] = check +} + +func getInitialAttachDetachDelay(status string) time.Duration { + if status == volumeDetachedStatus { + return volumeDetachmentStatusInitialDelay + } + return volumeAttachmentStatusInitialDelay +} + +// describeNetworkInterfaces returns network interface information for the given DNS name. +func (c *Cloud) describeNetworkInterfaces(nodeName string) (*ec2.NetworkInterface, error) { + eniEndpoint := strings.TrimPrefix(nodeName, fargateNodeNamePrefix) + + filters := []*ec2.Filter{ + newEc2Filter("attachment.status", "attached"), + newEc2Filter("vpc-id", c.vpcID), + } + + // when enableDnsSupport is set to false in a VPC, interface will not have private DNS names. + if strings.HasPrefix(eniEndpoint, privateDNSNamePrefix) { + filters = append(filters, newEc2Filter("private-dns-name", eniEndpoint)) + } else { + filters = append(filters, newEc2Filter("private-ip-address", eniEndpoint)) + } + + request := &ec2.DescribeNetworkInterfacesInput{ + Filters: filters, + } + + eni, err := c.ec2.DescribeNetworkInterfaces(request) + if err != nil { + return nil, err + } + if len(eni.NetworkInterfaces) == 0 { + return nil, nil + } + if len(eni.NetworkInterfaces) != 1 { + // This should not be possible - ids should be unique + return nil, fmt.Errorf("multiple interfaces found with same id %q", eni.NetworkInterfaces) + } + return eni.NetworkInterfaces[0], nil +} + +func getRegionFromMetadata(cfg CloudConfig, metadata EC2Metadata) (string, string, error) { + klog.Infof("Get AWS region from metadata client") + err := updateConfigZone(&cfg, metadata) + if err != nil { + return "", "", fmt.Errorf("unable to determine AWS zone from cloud provider config or EC2 instance metadata: %v", err) + } + + zone := cfg.Global.Zone + if len(zone) <= 1 { + return "", "", fmt.Errorf("invalid AWS zone in config file: %s", zone) + } + + regionName, err := azToRegion(zone) + if err != nil { + return "", "", err + } + + return regionName, zone, nil +} diff --git a/pkg/providers/v1/aws_fakes.go b/pkg/providers/v1/aws_fakes.go new file mode 100644 index 0000000000..48eb4e942a --- /dev/null +++ b/pkg/providers/v1/aws_fakes.go @@ -0,0 +1,716 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package aws + +import ( + "fmt" + "sort" + "strings" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/autoscaling" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/elb" + "github.com/aws/aws-sdk-go/service/elbv2" + "github.com/aws/aws-sdk-go/service/kms" + "k8s.io/klog/v2" +) + +// FakeAWSServices is an fake AWS session used for testing +type FakeAWSServices struct { + region string + instances []*ec2.Instance + selfInstance *ec2.Instance + networkInterfacesMacs []string + networkInterfacesPrivateIPs [][]string + networkInterfacesVpcIDs []string + + ec2 FakeEC2 + elb ELB + elbv2 ELBV2 + asg *FakeASG + metadata *FakeMetadata + kms *FakeKMS +} + +// NewFakeAWSServices creates a new FakeAWSServices +func NewFakeAWSServices(clusterID string) *FakeAWSServices { + s := &FakeAWSServices{} + s.region = "us-east-1" + s.ec2 = &FakeEC2Impl{aws: s} + s.elb = &FakeELB{aws: s} + s.elbv2 = &FakeELBV2{aws: s} + s.asg = &FakeASG{aws: s} + s.metadata = &FakeMetadata{aws: s} + s.kms = &FakeKMS{aws: s} + + s.networkInterfacesMacs = []string{"aa:bb:cc:dd:ee:00", "aa:bb:cc:dd:ee:01"} + s.networkInterfacesVpcIDs = []string{"vpc-mac0", "vpc-mac1"} + + selfInstance := &ec2.Instance{} + selfInstance.InstanceId = aws.String("i-self") + selfInstance.Placement = &ec2.Placement{ + AvailabilityZone: aws.String("us-east-1a"), + } + selfInstance.PrivateDnsName = aws.String("ip-172-20-0-100.ec2.internal") + selfInstance.PrivateIpAddress = aws.String("192.168.0.1") + selfInstance.PublicIpAddress = aws.String("1.2.3.4") + s.selfInstance = selfInstance + s.instances = []*ec2.Instance{selfInstance} + + var tag ec2.Tag + tag.Key = aws.String(TagNameKubernetesClusterLegacy) + tag.Value = aws.String(clusterID) + selfInstance.Tags = []*ec2.Tag{&tag} + + return s +} + +// WithAz sets the ec2 placement availability zone +func (s *FakeAWSServices) WithAz(az string) *FakeAWSServices { + if s.selfInstance.Placement == nil { + s.selfInstance.Placement = &ec2.Placement{} + } + s.selfInstance.Placement.AvailabilityZone = aws.String(az) + return s +} + +// Compute returns a fake EC2 client +func (s *FakeAWSServices) Compute(region string) (EC2, error) { + return s.ec2, nil +} + +// LoadBalancing returns a fake ELB client +func (s *FakeAWSServices) LoadBalancing(region string) (ELB, error) { + return s.elb, nil +} + +// LoadBalancingV2 returns a fake ELBV2 client +func (s *FakeAWSServices) LoadBalancingV2(region string) (ELBV2, error) { + return s.elbv2, nil +} + +// Autoscaling returns a fake ASG client +func (s *FakeAWSServices) Autoscaling(region string) (ASG, error) { + return s.asg, nil +} + +// Metadata returns a fake EC2Metadata client +func (s *FakeAWSServices) Metadata() (EC2Metadata, error) { + return s.metadata, nil +} + +// KeyManagement returns a fake KMS client +func (s *FakeAWSServices) KeyManagement(region string) (KMS, error) { + return s.kms, nil +} + +// FakeEC2 is a fake EC2 client used for testing +type FakeEC2 interface { + EC2 + CreateSubnet(*ec2.Subnet) (*ec2.CreateSubnetOutput, error) + RemoveSubnets() + CreateRouteTable(*ec2.RouteTable) (*ec2.CreateRouteTableOutput, error) + RemoveRouteTables() +} + +// FakeEC2Impl is an implementation of the FakeEC2 interface used for testing +type FakeEC2Impl struct { + aws *FakeAWSServices + Subnets []*ec2.Subnet + DescribeSubnetsInput *ec2.DescribeSubnetsInput + RouteTables []*ec2.RouteTable + DescribeRouteTablesInput *ec2.DescribeRouteTablesInput +} + +// DescribeInstances returns fake instance descriptions +func (ec2i *FakeEC2Impl) DescribeInstances(request *ec2.DescribeInstancesInput) ([]*ec2.Instance, error) { + matches := []*ec2.Instance{} + for _, instance := range ec2i.aws.instances { + if request.InstanceIds != nil { + if instance.InstanceId == nil { + klog.Warning("Instance with no instance id: ", instance) + continue + } + + found := false + for _, instanceID := range request.InstanceIds { + if *instanceID == *instance.InstanceId { + found = true + break + } + } + if !found { + continue + } + } + if request.Filters != nil { + allMatch := true + for _, filter := range request.Filters { + if !instanceMatchesFilter(instance, filter) { + allMatch = false + break + } + } + if !allMatch { + continue + } + } + matches = append(matches, instance) + } + + return matches, nil +} + +// AttachVolume is not implemented but is required for interface conformance +func (ec2i *FakeEC2Impl) AttachVolume(request *ec2.AttachVolumeInput) (resp *ec2.VolumeAttachment, err error) { + panic("Not implemented") +} + +// DetachVolume is not implemented but is required for interface conformance +func (ec2i *FakeEC2Impl) DetachVolume(request *ec2.DetachVolumeInput) (resp *ec2.VolumeAttachment, err error) { + panic("Not implemented") +} + +// DescribeVolumes is not implemented but is required for interface conformance +func (ec2i *FakeEC2Impl) DescribeVolumes(request *ec2.DescribeVolumesInput) ([]*ec2.Volume, error) { + panic("Not implemented") +} + +// CreateVolume is not implemented but is required for interface conformance +func (ec2i *FakeEC2Impl) CreateVolume(request *ec2.CreateVolumeInput) (resp *ec2.Volume, err error) { + panic("Not implemented") +} + +// DeleteVolume is not implemented but is required for interface conformance +func (ec2i *FakeEC2Impl) DeleteVolume(request *ec2.DeleteVolumeInput) (resp *ec2.DeleteVolumeOutput, err error) { + panic("Not implemented") +} + +// DescribeSecurityGroups is not implemented but is required for interface +// conformance +func (ec2i *FakeEC2Impl) DescribeSecurityGroups(request *ec2.DescribeSecurityGroupsInput) ([]*ec2.SecurityGroup, error) { + panic("Not implemented") +} + +// CreateSecurityGroup is not implemented but is required for interface +// conformance +func (ec2i *FakeEC2Impl) CreateSecurityGroup(*ec2.CreateSecurityGroupInput) (*ec2.CreateSecurityGroupOutput, error) { + panic("Not implemented") +} + +// DeleteSecurityGroup is not implemented but is required for interface +// conformance +func (ec2i *FakeEC2Impl) DeleteSecurityGroup(*ec2.DeleteSecurityGroupInput) (*ec2.DeleteSecurityGroupOutput, error) { + panic("Not implemented") +} + +// AuthorizeSecurityGroupIngress is not implemented but is required for +// interface conformance +func (ec2i *FakeEC2Impl) AuthorizeSecurityGroupIngress(*ec2.AuthorizeSecurityGroupIngressInput) (*ec2.AuthorizeSecurityGroupIngressOutput, error) { + panic("Not implemented") +} + +// RevokeSecurityGroupIngress is not implemented but is required for interface +// conformance +func (ec2i *FakeEC2Impl) RevokeSecurityGroupIngress(*ec2.RevokeSecurityGroupIngressInput) (*ec2.RevokeSecurityGroupIngressOutput, error) { + panic("Not implemented") +} + +// DescribeVolumeModifications is not implemented but is required for interface +// conformance +func (ec2i *FakeEC2Impl) DescribeVolumeModifications(*ec2.DescribeVolumesModificationsInput) ([]*ec2.VolumeModification, error) { + panic("Not implemented") +} + +// ModifyVolume is not implemented but is required for interface conformance +func (ec2i *FakeEC2Impl) ModifyVolume(*ec2.ModifyVolumeInput) (*ec2.ModifyVolumeOutput, error) { + panic("Not implemented") +} + +// CreateSubnet creates fake subnets +func (ec2i *FakeEC2Impl) CreateSubnet(request *ec2.Subnet) (*ec2.CreateSubnetOutput, error) { + ec2i.Subnets = append(ec2i.Subnets, request) + response := &ec2.CreateSubnetOutput{ + Subnet: request, + } + return response, nil +} + +// DescribeSubnets returns fake subnet descriptions +func (ec2i *FakeEC2Impl) DescribeSubnets(request *ec2.DescribeSubnetsInput) ([]*ec2.Subnet, error) { + ec2i.DescribeSubnetsInput = request + return ec2i.Subnets, nil +} + +// RemoveSubnets clears subnets on client +func (ec2i *FakeEC2Impl) RemoveSubnets() { + ec2i.Subnets = ec2i.Subnets[:0] +} + +// CreateTags is not implemented but is required for interface conformance +func (ec2i *FakeEC2Impl) CreateTags(*ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) { + panic("Not implemented") +} + +// DeleteTags is not implemented but is required for interface conformance +func (ec2i *FakeEC2Impl) DeleteTags(input *ec2.DeleteTagsInput) (*ec2.DeleteTagsOutput, error) { + panic("Not implemented") +} + +// DescribeRouteTables returns fake route table descriptions +func (ec2i *FakeEC2Impl) DescribeRouteTables(request *ec2.DescribeRouteTablesInput) ([]*ec2.RouteTable, error) { + ec2i.DescribeRouteTablesInput = request + return ec2i.RouteTables, nil +} + +// CreateRouteTable creates fake route tables +func (ec2i *FakeEC2Impl) CreateRouteTable(request *ec2.RouteTable) (*ec2.CreateRouteTableOutput, error) { + ec2i.RouteTables = append(ec2i.RouteTables, request) + response := &ec2.CreateRouteTableOutput{ + RouteTable: request, + } + return response, nil +} + +// RemoveRouteTables clears route tables on client +func (ec2i *FakeEC2Impl) RemoveRouteTables() { + ec2i.RouteTables = ec2i.RouteTables[:0] +} + +// CreateRoute is not implemented but is required for interface conformance +func (ec2i *FakeEC2Impl) CreateRoute(request *ec2.CreateRouteInput) (*ec2.CreateRouteOutput, error) { + panic("Not implemented") +} + +// DeleteRoute is not implemented but is required for interface conformance +func (ec2i *FakeEC2Impl) DeleteRoute(request *ec2.DeleteRouteInput) (*ec2.DeleteRouteOutput, error) { + panic("Not implemented") +} + +// ModifyInstanceAttribute is not implemented but is required for interface +// conformance +func (ec2i *FakeEC2Impl) ModifyInstanceAttribute(request *ec2.ModifyInstanceAttributeInput) (*ec2.ModifyInstanceAttributeOutput, error) { + panic("Not implemented") +} + +// DescribeVpcs returns fake VPC descriptions +func (ec2i *FakeEC2Impl) DescribeVpcs(request *ec2.DescribeVpcsInput) (*ec2.DescribeVpcsOutput, error) { + return &ec2.DescribeVpcsOutput{Vpcs: []*ec2.Vpc{{CidrBlock: aws.String("172.20.0.0/16")}}}, nil +} + +// FakeMetadata is a fake EC2 metadata service client used for testing +type FakeMetadata struct { + aws *FakeAWSServices +} + +// GetMetadata returns fake EC2 metadata for testing +func (m *FakeMetadata) GetMetadata(key string) (string, error) { + networkInterfacesPrefix := "network/interfaces/macs/" + i := m.aws.selfInstance + if key == "placement/availability-zone" { + az := "" + if i.Placement != nil { + az = aws.StringValue(i.Placement.AvailabilityZone) + } + return az, nil + } else if key == "instance-id" { + return aws.StringValue(i.InstanceId), nil + } else if key == "local-hostname" { + return aws.StringValue(i.PrivateDnsName), nil + } else if key == "public-hostname" { + return aws.StringValue(i.PublicDnsName), nil + } else if key == "local-ipv4" { + return aws.StringValue(i.PrivateIpAddress), nil + } else if key == "public-ipv4" { + return aws.StringValue(i.PublicIpAddress), nil + } else if strings.HasPrefix(key, networkInterfacesPrefix) { + if key == networkInterfacesPrefix { + // Return the MACs sorted lexically rather than in device-number + // order; this matches AWS's observed behavior and lets us test + // that we fix up the ordering correctly in NodeAddresses(). + macs := make([]string, len(m.aws.networkInterfacesMacs)) + copy(macs, m.aws.networkInterfacesMacs) + sort.Strings(macs) + return strings.Join(macs, "/\n") + "/\n", nil + } + + keySplit := strings.Split(key, "/") + macParam := keySplit[3] + if len(keySplit) == 5 && keySplit[4] == "vpc-id" { + for i, macElem := range m.aws.networkInterfacesMacs { + if macParam == macElem { + return m.aws.networkInterfacesVpcIDs[i], nil + } + } + } + if len(keySplit) == 5 && keySplit[4] == "device-number" { + for i, macElem := range m.aws.networkInterfacesMacs { + if macParam == macElem { + n := i + if n > 0 { + // Introduce an artificial gap, just to test eg: [eth0, eth2] + n++ + } + return fmt.Sprintf("%d\n", n), nil + } + } + } + if len(keySplit) == 5 && keySplit[4] == "local-ipv4s" { + for i, macElem := range m.aws.networkInterfacesMacs { + if macParam == macElem { + return strings.Join(m.aws.networkInterfacesPrivateIPs[i], "/\n"), nil + } + } + } + + return "", nil + } + + return "", nil +} + +// FakeELB is a fake ELB client used for testing +type FakeELB struct { + aws *FakeAWSServices +} + +// CreateLoadBalancer is not implemented but is required for interface +// conformance +func (elb *FakeELB) CreateLoadBalancer(*elb.CreateLoadBalancerInput) (*elb.CreateLoadBalancerOutput, error) { + panic("Not implemented") +} + +// DeleteLoadBalancer is not implemented but is required for interface +// conformance +func (elb *FakeELB) DeleteLoadBalancer(input *elb.DeleteLoadBalancerInput) (*elb.DeleteLoadBalancerOutput, error) { + panic("Not implemented") +} + +// DescribeLoadBalancers is not implemented but is required for interface +// conformance +func (elb *FakeELB) DescribeLoadBalancers(input *elb.DescribeLoadBalancersInput) (*elb.DescribeLoadBalancersOutput, error) { + panic("Not implemented") +} + +// AddTags is not implemented but is required for interface conformance +func (elb *FakeELB) AddTags(input *elb.AddTagsInput) (*elb.AddTagsOutput, error) { + panic("Not implemented") +} + +// RegisterInstancesWithLoadBalancer is not implemented but is required for +// interface conformance +func (elb *FakeELB) RegisterInstancesWithLoadBalancer(*elb.RegisterInstancesWithLoadBalancerInput) (*elb.RegisterInstancesWithLoadBalancerOutput, error) { + panic("Not implemented") +} + +// DeregisterInstancesFromLoadBalancer is not implemented but is required for +// interface conformance +func (elb *FakeELB) DeregisterInstancesFromLoadBalancer(*elb.DeregisterInstancesFromLoadBalancerInput) (*elb.DeregisterInstancesFromLoadBalancerOutput, error) { + panic("Not implemented") +} + +// DetachLoadBalancerFromSubnets is not implemented but is required for +// interface conformance +func (elb *FakeELB) DetachLoadBalancerFromSubnets(*elb.DetachLoadBalancerFromSubnetsInput) (*elb.DetachLoadBalancerFromSubnetsOutput, error) { + panic("Not implemented") +} + +// AttachLoadBalancerToSubnets is not implemented but is required for interface +// conformance +func (elb *FakeELB) AttachLoadBalancerToSubnets(*elb.AttachLoadBalancerToSubnetsInput) (*elb.AttachLoadBalancerToSubnetsOutput, error) { + panic("Not implemented") +} + +// CreateLoadBalancerListeners is not implemented but is required for interface +// conformance +func (elb *FakeELB) CreateLoadBalancerListeners(*elb.CreateLoadBalancerListenersInput) (*elb.CreateLoadBalancerListenersOutput, error) { + panic("Not implemented") +} + +// DeleteLoadBalancerListeners is not implemented but is required for interface +// conformance +func (elb *FakeELB) DeleteLoadBalancerListeners(*elb.DeleteLoadBalancerListenersInput) (*elb.DeleteLoadBalancerListenersOutput, error) { + panic("Not implemented") +} + +// ApplySecurityGroupsToLoadBalancer is not implemented but is required for +// interface conformance +func (elb *FakeELB) ApplySecurityGroupsToLoadBalancer(*elb.ApplySecurityGroupsToLoadBalancerInput) (*elb.ApplySecurityGroupsToLoadBalancerOutput, error) { + panic("Not implemented") +} + +// ConfigureHealthCheck is not implemented but is required for interface +// conformance +func (elb *FakeELB) ConfigureHealthCheck(*elb.ConfigureHealthCheckInput) (*elb.ConfigureHealthCheckOutput, error) { + panic("Not implemented") +} + +// CreateLoadBalancerPolicy is not implemented but is required for interface +// conformance +func (elb *FakeELB) CreateLoadBalancerPolicy(*elb.CreateLoadBalancerPolicyInput) (*elb.CreateLoadBalancerPolicyOutput, error) { + panic("Not implemented") +} + +// SetLoadBalancerPoliciesForBackendServer is not implemented but is required +// for interface conformance +func (elb *FakeELB) SetLoadBalancerPoliciesForBackendServer(*elb.SetLoadBalancerPoliciesForBackendServerInput) (*elb.SetLoadBalancerPoliciesForBackendServerOutput, error) { + panic("Not implemented") +} + +// SetLoadBalancerPoliciesOfListener is not implemented but is required for +// interface conformance +func (elb *FakeELB) SetLoadBalancerPoliciesOfListener(input *elb.SetLoadBalancerPoliciesOfListenerInput) (*elb.SetLoadBalancerPoliciesOfListenerOutput, error) { + panic("Not implemented") +} + +// DescribeLoadBalancerPolicies is not implemented but is required for +// interface conformance +func (elb *FakeELB) DescribeLoadBalancerPolicies(input *elb.DescribeLoadBalancerPoliciesInput) (*elb.DescribeLoadBalancerPoliciesOutput, error) { + panic("Not implemented") +} + +// DescribeLoadBalancerAttributes is not implemented but is required for +// interface conformance +func (elb *FakeELB) DescribeLoadBalancerAttributes(*elb.DescribeLoadBalancerAttributesInput) (*elb.DescribeLoadBalancerAttributesOutput, error) { + panic("Not implemented") +} + +// ModifyLoadBalancerAttributes is not implemented but is required for +// interface conformance +func (elb *FakeELB) ModifyLoadBalancerAttributes(*elb.ModifyLoadBalancerAttributesInput) (*elb.ModifyLoadBalancerAttributesOutput, error) { + panic("Not implemented") +} + +// FakeELBV2 is a fake ELBV2 client used for testing +type FakeELBV2 struct { + aws *FakeAWSServices +} + +// AddTags is not implemented but is required for interface conformance +func (elb *FakeELBV2) AddTags(input *elbv2.AddTagsInput) (*elbv2.AddTagsOutput, error) { + panic("Not implemented") +} + +// CreateLoadBalancer is not implemented but is required for interface +// conformance +func (elb *FakeELBV2) CreateLoadBalancer(*elbv2.CreateLoadBalancerInput) (*elbv2.CreateLoadBalancerOutput, error) { + panic("Not implemented") +} + +// DescribeLoadBalancers is not implemented but is required for interface +// conformance +func (elb *FakeELBV2) DescribeLoadBalancers(*elbv2.DescribeLoadBalancersInput) (*elbv2.DescribeLoadBalancersOutput, error) { + panic("Not implemented") +} + +// DeleteLoadBalancer is not implemented but is required for interface +// conformance +func (elb *FakeELBV2) DeleteLoadBalancer(*elbv2.DeleteLoadBalancerInput) (*elbv2.DeleteLoadBalancerOutput, error) { + panic("Not implemented") +} + +// ModifyLoadBalancerAttributes is not implemented but is required for +// interface conformance +func (elb *FakeELBV2) ModifyLoadBalancerAttributes(*elbv2.ModifyLoadBalancerAttributesInput) (*elbv2.ModifyLoadBalancerAttributesOutput, error) { + panic("Not implemented") +} + +// DescribeLoadBalancerAttributes is not implemented but is required for +// interface conformance +func (elb *FakeELBV2) DescribeLoadBalancerAttributes(*elbv2.DescribeLoadBalancerAttributesInput) (*elbv2.DescribeLoadBalancerAttributesOutput, error) { + panic("Not implemented") +} + +// CreateTargetGroup is not implemented but is required for interface +// conformance +func (elb *FakeELBV2) CreateTargetGroup(*elbv2.CreateTargetGroupInput) (*elbv2.CreateTargetGroupOutput, error) { + panic("Not implemented") +} + +// DescribeTargetGroups is not implemented but is required for interface +// conformance +func (elb *FakeELBV2) DescribeTargetGroups(*elbv2.DescribeTargetGroupsInput) (*elbv2.DescribeTargetGroupsOutput, error) { + panic("Not implemented") +} + +// ModifyTargetGroup is not implemented but is required for interface +// conformance +func (elb *FakeELBV2) ModifyTargetGroup(*elbv2.ModifyTargetGroupInput) (*elbv2.ModifyTargetGroupOutput, error) { + panic("Not implemented") +} + +// DeleteTargetGroup is not implemented but is required for interface +// conformance +func (elb *FakeELBV2) DeleteTargetGroup(*elbv2.DeleteTargetGroupInput) (*elbv2.DeleteTargetGroupOutput, error) { + panic("Not implemented") +} + +// DescribeTargetHealth is not implemented but is required for interface +// conformance +func (elb *FakeELBV2) DescribeTargetHealth(input *elbv2.DescribeTargetHealthInput) (*elbv2.DescribeTargetHealthOutput, error) { + panic("Not implemented") +} + +// DescribeTargetGroupAttributes is not implemented but is required for +// interface conformance +func (elb *FakeELBV2) DescribeTargetGroupAttributes(*elbv2.DescribeTargetGroupAttributesInput) (*elbv2.DescribeTargetGroupAttributesOutput, error) { + panic("Not implemented") +} + +// ModifyTargetGroupAttributes is not implemented but is required for interface +// conformance +func (elb *FakeELBV2) ModifyTargetGroupAttributes(*elbv2.ModifyTargetGroupAttributesInput) (*elbv2.ModifyTargetGroupAttributesOutput, error) { + panic("Not implemented") +} + +// RegisterTargets is not implemented but is required for interface conformance +func (elb *FakeELBV2) RegisterTargets(*elbv2.RegisterTargetsInput) (*elbv2.RegisterTargetsOutput, error) { + panic("Not implemented") +} + +// DeregisterTargets is not implemented but is required for interface +// conformance +func (elb *FakeELBV2) DeregisterTargets(*elbv2.DeregisterTargetsInput) (*elbv2.DeregisterTargetsOutput, error) { + panic("Not implemented") +} + +// CreateListener is not implemented but is required for interface conformance +func (elb *FakeELBV2) CreateListener(*elbv2.CreateListenerInput) (*elbv2.CreateListenerOutput, error) { + panic("Not implemented") +} + +// DescribeListeners is not implemented but is required for interface +// conformance +func (elb *FakeELBV2) DescribeListeners(*elbv2.DescribeListenersInput) (*elbv2.DescribeListenersOutput, error) { + panic("Not implemented") +} + +// DeleteListener is not implemented but is required for interface conformance +func (elb *FakeELBV2) DeleteListener(*elbv2.DeleteListenerInput) (*elbv2.DeleteListenerOutput, error) { + panic("Not implemented") +} + +// ModifyListener is not implemented but is required for interface conformance +func (elb *FakeELBV2) ModifyListener(*elbv2.ModifyListenerInput) (*elbv2.ModifyListenerOutput, error) { + panic("Not implemented") +} + +// WaitUntilLoadBalancersDeleted is not implemented but is required for +// interface conformance +func (elb *FakeELBV2) WaitUntilLoadBalancersDeleted(*elbv2.DescribeLoadBalancersInput) error { + panic("Not implemented") +} + +// FakeASG is a fake Autoscaling client used for testing +type FakeASG struct { + aws *FakeAWSServices +} + +// UpdateAutoScalingGroup is not implemented but is required for interface +// conformance +func (a *FakeASG) UpdateAutoScalingGroup(*autoscaling.UpdateAutoScalingGroupInput) (*autoscaling.UpdateAutoScalingGroupOutput, error) { + panic("Not implemented") +} + +// DescribeAutoScalingGroups is not implemented but is required for interface +// conformance +func (a *FakeASG) DescribeAutoScalingGroups(*autoscaling.DescribeAutoScalingGroupsInput) (*autoscaling.DescribeAutoScalingGroupsOutput, error) { + panic("Not implemented") +} + +// FakeKMS is a fake KMS client used for testing +type FakeKMS struct { + aws *FakeAWSServices +} + +// DescribeKey is not implemented but is required for interface conformance +func (kms *FakeKMS) DescribeKey(*kms.DescribeKeyInput) (*kms.DescribeKeyOutput, error) { + panic("Not implemented") +} + +func instanceMatchesFilter(instance *ec2.Instance, filter *ec2.Filter) bool { + name := *filter.Name + if name == "private-dns-name" { + if instance.PrivateDnsName == nil { + return false + } + return contains(filter.Values, *instance.PrivateDnsName) + } + + if name == "instance-state-name" { + return contains(filter.Values, *instance.State.Name) + } + + if name == "tag-key" { + for _, instanceTag := range instance.Tags { + if contains(filter.Values, aws.StringValue(instanceTag.Key)) { + return true + } + } + return false + } + + if strings.HasPrefix(name, "tag:") { + tagName := name[4:] + for _, instanceTag := range instance.Tags { + if aws.StringValue(instanceTag.Key) == tagName && contains(filter.Values, aws.StringValue(instanceTag.Value)) { + return true + } + } + return false + } + + panic("Unknown filter name: " + name) +} + +func contains(haystack []*string, needle string) bool { + for _, s := range haystack { + // (deliberately panic if s == nil) + if needle == *s { + return true + } + } + return false +} + +// DescribeNetworkInterfaces returns list of ENIs for testing +func (ec2i *FakeEC2Impl) DescribeNetworkInterfaces(input *ec2.DescribeNetworkInterfacesInput) (*ec2.DescribeNetworkInterfacesOutput, error) { + networkInterface := []*ec2.NetworkInterface{ + { + PrivateIpAddress: aws.String("1.2.3.4"), + AvailabilityZone: aws.String("us-west-2c"), + }, + } + for _, filter := range input.Filters { + if strings.HasPrefix(*filter.Values[0], fargateNodeNamePrefix) { + // verify filter doesn't have fargate prefix + panic(fmt.Sprintf("invalid endpoint specified for DescribeNetworkInterface call %s", *filter.Values[0])) + } else if strings.HasPrefix(*filter.Values[0], "not-found") { + // for negative testing + return &ec2.DescribeNetworkInterfacesOutput{}, nil + } + + if *filter.Name == "private-dns-name" { + networkInterface[0].PrivateDnsName = aws.String("ip-1-2-3-4.compute.amazon.com") + } + } + + return &ec2.DescribeNetworkInterfacesOutput{ + NetworkInterfaces: networkInterface, + }, nil +} diff --git a/pkg/providers/v1/tags.go b/pkg/providers/v1/tags.go new file mode 100644 index 0000000000..bc66436c12 --- /dev/null +++ b/pkg/providers/v1/tags.go @@ -0,0 +1,357 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package aws + +import ( + "fmt" + "strings" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + "k8s.io/klog/v2" + + "k8s.io/apimachinery/pkg/util/wait" +) + +// TagNameKubernetesClusterPrefix is the tag name we use to differentiate multiple +// logically independent clusters running in the same AZ. +// The tag key = TagNameKubernetesClusterPrefix + clusterID +// The tag value is an ownership value +const TagNameKubernetesClusterPrefix = "kubernetes.io/cluster/" + +// TagNameKubernetesClusterLegacy is the legacy tag name we use to differentiate multiple +// logically independent clusters running in the same AZ. The problem with it was that it +// did not allow shared resources. +const TagNameKubernetesClusterLegacy = "KubernetesCluster" + +// ResourceLifecycle is the cluster lifecycle state used in tagging +type ResourceLifecycle string + +const ( + // ResourceLifecycleOwned is the value we use when tagging resources to indicate + // that the resource is considered owned and managed by the cluster, + // and in particular that the lifecycle is tied to the lifecycle of the cluster. + ResourceLifecycleOwned = "owned" + // ResourceLifecycleShared is the value we use when tagging resources to indicate + // that the resource is shared between multiple clusters, and should not be destroyed + // if the cluster is destroyed. + ResourceLifecycleShared = "shared" +) + +type awsTagging struct { + // ClusterID is our cluster identifier: we tag AWS resources with this value, + // and thus we can run two independent clusters in the same VPC or subnets. + // This gives us similar functionality to GCE projects. + ClusterID string + + // usesLegacyTags is true if we are using the legacy TagNameKubernetesClusterLegacy tags + usesLegacyTags bool +} + +func (t *awsTagging) init(legacyClusterID string, clusterID string) error { + if legacyClusterID != "" { + if clusterID != "" && legacyClusterID != clusterID { + return fmt.Errorf("clusterID tags did not match: %q vs %q", clusterID, legacyClusterID) + } + t.usesLegacyTags = true + clusterID = legacyClusterID + } + + t.ClusterID = clusterID + + if clusterID != "" { + klog.Infof("AWS cloud filtering on ClusterID: %v", clusterID) + } else { + return fmt.Errorf("AWS cloud failed to find ClusterID") + } + + return nil +} + +// Extracts a clusterID from the given tags, if one is present +// If no clusterID is found, returns "", nil +// If multiple (different) clusterIDs are found, returns an error +func (t *awsTagging) initFromTags(tags []*ec2.Tag) error { + legacyClusterID, newClusterID, err := findClusterIDs(tags) + if err != nil { + return err + } + + if legacyClusterID == "" && newClusterID == "" { + klog.Errorf("Tag %q nor %q not found; Kubernetes may behave unexpectedly.", TagNameKubernetesClusterLegacy, TagNameKubernetesClusterPrefix+"...") + } + + return t.init(legacyClusterID, newClusterID) +} + +// Extracts the legacy & new cluster ids from the given tags, if they are present +// If duplicate tags are found, returns an error +func findClusterIDs(tags []*ec2.Tag) (string, string, error) { + legacyClusterID := "" + newClusterID := "" + + for _, tag := range tags { + tagKey := aws.StringValue(tag.Key) + if strings.HasPrefix(tagKey, TagNameKubernetesClusterPrefix) { + id := strings.TrimPrefix(tagKey, TagNameKubernetesClusterPrefix) + if newClusterID != "" { + return "", "", fmt.Errorf("Found multiple cluster tags with prefix %s (%q and %q)", TagNameKubernetesClusterPrefix, newClusterID, id) + } + newClusterID = id + } + + if tagKey == TagNameKubernetesClusterLegacy { + id := aws.StringValue(tag.Value) + if legacyClusterID != "" { + return "", "", fmt.Errorf("Found multiple %s tags (%q and %q)", TagNameKubernetesClusterLegacy, legacyClusterID, id) + } + legacyClusterID = id + } + } + + return legacyClusterID, newClusterID, nil +} + +func (t *awsTagging) clusterTagKey() string { + return TagNameKubernetesClusterPrefix + t.ClusterID +} + +func (t *awsTagging) hasClusterTag(tags []*ec2.Tag) bool { + // if the clusterID is not configured -- we consider all instances. + if len(t.ClusterID) == 0 { + return true + } + clusterTagKey := t.clusterTagKey() + for _, tag := range tags { + tagKey := aws.StringValue(tag.Key) + // For 1.6, we continue to recognize the legacy tags, for the 1.5 -> 1.6 upgrade + // Note that we want to continue traversing tag list if we see a legacy tag with value != ClusterID + if (tagKey == TagNameKubernetesClusterLegacy) && (aws.StringValue(tag.Value) == t.ClusterID) { + return true + } + if tagKey == clusterTagKey { + return true + } + } + return false +} + +func (t *awsTagging) hasNoClusterPrefixTag(tags []*ec2.Tag) bool { + for _, tag := range tags { + if strings.HasPrefix(aws.StringValue(tag.Key), TagNameKubernetesClusterPrefix) { + return false + } + } + return true +} + +// Ensure that a resource has the correct tags +// If it has no tags, we assume that this was a problem caused by an error in between creation and tagging, +// and we add the tags. If it has a different cluster's tags, that is an error. +func (t *awsTagging) readRepairClusterTags(client EC2, resourceID string, lifecycle ResourceLifecycle, additionalTags map[string]string, observedTags []*ec2.Tag) error { + actualTagMap := make(map[string]string) + for _, tag := range observedTags { + actualTagMap[aws.StringValue(tag.Key)] = aws.StringValue(tag.Value) + } + + expectedTags := t.buildTags(lifecycle, additionalTags) + + addTags := make(map[string]string) + for k, expected := range expectedTags { + actual := actualTagMap[k] + if actual == expected { + continue + } + if actual == "" { + klog.Warningf("Resource %q was missing expected cluster tag %q. Will add (with value %q)", resourceID, k, expected) + addTags[k] = expected + } else { + return fmt.Errorf("resource %q has tag belonging to another cluster: %q=%q (expected %q)", resourceID, k, actual, expected) + } + } + + if len(addTags) == 0 { + return nil + } + + if err := t.createTags(client, resourceID, lifecycle, addTags); err != nil { + return fmt.Errorf("error adding missing tags to resource %q: %q", resourceID, err) + } + + return nil +} + +// createTags calls EC2 CreateTags, but adds retry-on-failure logic +// We retry mainly because if we create an object, we cannot tag it until it is "fully created" (eventual consistency) +// The error code varies though (depending on what we are tagging), so we simply retry on all errors +func (t *awsTagging) createTags(client EC2, resourceID string, lifecycle ResourceLifecycle, additionalTags map[string]string) error { + tags := t.buildTags(lifecycle, additionalTags) + + if tags == nil || len(tags) == 0 { + return nil + } + + var awsTags []*ec2.Tag + for k, v := range tags { + tag := &ec2.Tag{ + Key: aws.String(k), + Value: aws.String(v), + } + awsTags = append(awsTags, tag) + } + + backoff := wait.Backoff{ + Duration: createTagInitialDelay, + Factor: createTagFactor, + Steps: createTagSteps, + } + request := &ec2.CreateTagsInput{} + request.Resources = []*string{&resourceID} + request.Tags = awsTags + + var lastErr error + err := wait.ExponentialBackoff(backoff, func() (bool, error) { + _, err := client.CreateTags(request) + if err == nil { + return true, nil + } + + // We could check that the error is retryable, but the error code changes based on what we are tagging + // SecurityGroup: InvalidGroup.NotFound + klog.V(2).Infof("Failed to create tags; will retry. Error was %q", err) + lastErr = err + return false, nil + }) + if err == wait.ErrWaitTimeout { + // return real CreateTags error instead of timeout + err = lastErr + } + return err +} + +// Add additional filters, to match on our tags +// This lets us run multiple k8s clusters in a single EC2 AZ +func (t *awsTagging) addFilters(filters []*ec2.Filter) []*ec2.Filter { + // if there are no clusterID configured - no filtering by special tag names + // should be applied to revert to legacy behaviour. + if len(t.ClusterID) == 0 { + if len(filters) == 0 { + // We can't pass a zero-length Filters to AWS (it's an error) + // So if we end up with no filters; just return nil + return nil + } + return filters + } + + f := newEc2Filter("tag-key", t.clusterTagKey()) + filters = append(filters, f) + return filters +} + +// Add additional filters, to match on our tags. This uses the tag for legacy +// 1.5 -> 1.6 clusters and exists for backwards compatibility +// +// This lets us run multiple k8s clusters in a single EC2 AZ +func (t *awsTagging) addLegacyFilters(filters []*ec2.Filter) []*ec2.Filter { + // if there are no clusterID configured - no filtering by special tag names + // should be applied to revert to legacy behaviour. + if len(t.ClusterID) == 0 { + if len(filters) == 0 { + // We can't pass a zero-length Filters to AWS (it's an error) + // So if we end up with no filters; just return nil + return nil + } + return filters + } + + f := newEc2Filter(fmt.Sprintf("tag:%s", TagNameKubernetesClusterLegacy), t.ClusterID) + + // We can't pass a zero-length Filters to AWS (it's an error) + // So if we end up with no filters; we need to return nil + filters = append(filters, f) + return filters +} + +func (t *awsTagging) buildTags(lifecycle ResourceLifecycle, additionalTags map[string]string) map[string]string { + tags := make(map[string]string) + for k, v := range additionalTags { + tags[k] = v + } + + // no clusterID is a sign of misconfigured cluster, but we can't be tagging the resources with empty + // strings + if len(t.ClusterID) == 0 { + return tags + } + + // We only create legacy tags if we are using legacy tags, i.e. if we have seen a legacy tag on our instance + if t.usesLegacyTags { + tags[TagNameKubernetesClusterLegacy] = t.ClusterID + } + tags[t.clusterTagKey()] = string(lifecycle) + + return tags +} + +func (t *awsTagging) clusterID() string { + return t.ClusterID +} + +func (c *Cloud) TagResource(resourceId string, tags map[string]string) error { + request := &ec2.CreateTagsInput{ + Resources: []*string{aws.String(resourceId)}, + Tags: buildAwsTags(tags), + } + + _, err := c.ec2.CreateTags(request) + + if err != nil { + klog.Errorf("Error occurred trying to tag resources, %v", err) + return err + } + + return nil +} + +func (c *Cloud) UntagResource(resourceId string, tags map[string]string) error { + request := &ec2.DeleteTagsInput{ + Resources: []*string{aws.String(resourceId)}, + Tags: buildAwsTags(tags), + } + + _, err := c.ec2.DeleteTags(request) + + if err != nil { + klog.Errorf("Error occurred trying to untag resources, %v", err) + return err + } + + return nil +} + +func buildAwsTags(tags map[string]string) []*ec2.Tag { + var awsTags []*ec2.Tag + for k, v := range tags { + newTag := &ec2.Tag{ + Key: aws.String(k), + Value: aws.String(v), + } + awsTags = append(awsTags, newTag) + } + + return awsTags +} From e0aa7606de84bde3c16e042f4301f78f251a54e2 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Tue, 22 Mar 2022 15:24:13 -0700 Subject: [PATCH 15/40] Disabled the tagging controller by default --- cmd/aws-cloud-controller-manager/main.go | 15 +++++++++++++++ pkg/controllers/options/tagging_controller.go | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/cmd/aws-cloud-controller-manager/main.go b/cmd/aws-cloud-controller-manager/main.go index ab8dd6f0f0..2132638e08 100644 --- a/cmd/aws-cloud-controller-manager/main.go +++ b/cmd/aws-cloud-controller-manager/main.go @@ -50,6 +50,21 @@ import ( "k8s.io/kubernetes/cmd/cloud-controller-manager/app/options" "k8s.io/kubernetes/pkg/features" // add the kubernetes feature gates netutils "k8s.io/utils/net" + "k8s.io/apimachinery/pkg/util/wait" + cloudprovider "k8s.io/cloud-provider" + "k8s.io/cloud-provider-aws/pkg/controllers/tagging" + awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" + awsv2 "k8s.io/cloud-provider-aws/pkg/providers/v2" + "k8s.io/cloud-provider/app" + "k8s.io/cloud-provider/options" + cliflag "k8s.io/component-base/cli/flag" + "k8s.io/component-base/logs" + _ "k8s.io/component-base/metrics/prometheus/clientgo" // for client metric registration + _ "k8s.io/component-base/metrics/prometheus/version" // for version metric registration + "k8s.io/klog/v2" + "math/rand" + "os" + "time" cloudprovider "k8s.io/cloud-provider" awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" diff --git a/pkg/controllers/options/tagging_controller.go b/pkg/controllers/options/tagging_controller.go index e921c3519a..be217696db 100644 --- a/pkg/controllers/options/tagging_controller.go +++ b/pkg/controllers/options/tagging_controller.go @@ -15,7 +15,7 @@ func (o *TaggingControllerOptions) AddFlags(fs *pflag.FlagSet) { func (o *TaggingControllerOptions) Validate() error { if len(o.Tags) == 0 { - return fmt.Errorf("--tags must not be empty.") + return fmt.Errorf("--tags must not be empty") } return nil From df66d0ce25a73ff272908d2c0cfc7f986533cd74 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Tue, 22 Mar 2022 19:28:13 -0700 Subject: [PATCH 16/40] Updated test structure --- pkg/controllers/tagging/tagging_controller.go | 1 + .../tagging/tagging_controller_test.go | 37 +++++++++++++------ pkg/providers/v1/aws.go | 4 ++ pkg/providers/v1/aws_fakes.go | 6 +-- pkg/providers/v1/tags.go | 4 +- 5 files changed, 36 insertions(+), 16 deletions(-) diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index 84cdd6646b..fbc8af1674 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -109,6 +109,7 @@ func (tc *TaggingController) MonitorNodes(ctx context.Context) { } tc.nodeMap[node.GetName()] = node + tc.taggedNodes[node.GetName()] = true } tc.tagNodesResources(nodesToTag) diff --git a/pkg/controllers/tagging/tagging_controller_test.go b/pkg/controllers/tagging/tagging_controller_test.go index 740b9227e5..807f82475c 100644 --- a/pkg/controllers/tagging/tagging_controller_test.go +++ b/pkg/controllers/tagging/tagging_controller_test.go @@ -21,16 +21,17 @@ import ( coreinformers "k8s.io/client-go/informers/core/v1" "k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/tools/record" - fakecloud "k8s.io/cloud-provider/fake" + awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" "k8s.io/klog/v2" "testing" "time" ) +const TestClusterID = "clusterid.test" + func Test_NodesJoining(t *testing.T) { testcases := []struct { name string - fakeCloud *fakecloud.Cloud currNode *v1.Node taggingController TaggingController noOfNodes int @@ -42,9 +43,9 @@ func Test_NodesJoining(t *testing.T) { Name: "node0", CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), }, - }, - fakeCloud: &fakecloud.Cloud{ - ExistsByProviderID: false, + Spec: v1.NodeSpec{ + ProviderID: "i-00000", + }, }, taggingController: TaggingController{ taggedNodes: make(map[string]bool), @@ -59,9 +60,9 @@ func Test_NodesJoining(t *testing.T) { Name: "node1", CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), }, - }, - fakeCloud: &fakecloud.Cloud{ - ExistsByProviderID: false, + Spec: v1.NodeSpec{ + ProviderID: "i-00001", + }, }, taggingController: TaggingController{ taggedNodes: map[string]bool{ @@ -73,6 +74,9 @@ func Test_NodesJoining(t *testing.T) { Name: "node0", CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), }, + Spec: v1.NodeSpec{ + ProviderID: "i-00000", + }, }, }, }, @@ -85,9 +89,9 @@ func Test_NodesJoining(t *testing.T) { Name: "node2", CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), }, - }, - fakeCloud: &fakecloud.Cloud{ - ExistsByProviderID: false, + Spec: v1.NodeSpec{ + ProviderID: "i-00002", + }, }, taggingController: TaggingController{ taggedNodes: map[string]bool{ @@ -100,12 +104,18 @@ func Test_NodesJoining(t *testing.T) { Name: "node0", CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), }, + Spec: v1.NodeSpec{ + ProviderID: "i-00000", + }, }, "node1": { ObjectMeta: metav1.ObjectMeta{ Name: "node1", CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), }, + Spec: v1.NodeSpec{ + ProviderID: "i-00001", + }, }, }, }, @@ -113,6 +123,9 @@ func Test_NodesJoining(t *testing.T) { }, } + awsServices := awsv1.NewFakeAWSServices(TestClusterID) + fakeAws, _ := awsv1.NewAWSCloud(awsv1.CloudConfig{}, awsServices) + for _, testcase := range testcases { t.Run(testcase.name, func(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) @@ -128,7 +141,7 @@ func Test_NodesJoining(t *testing.T) { eventBroadcaster := record.NewBroadcaster() testcase.taggingController.nodeLister = nodeInformer.Lister() testcase.taggingController.kubeClient = clientset - testcase.taggingController.cloud = testcase.fakeCloud + testcase.taggingController.cloud = fakeAws testcase.taggingController.nodeMonitorPeriod = 1 * time.Second w := eventBroadcaster.StartLogging(klog.Infof) diff --git a/pkg/providers/v1/aws.go b/pkg/providers/v1/aws.go index e1fcdb5de6..280b678c79 100644 --- a/pkg/providers/v1/aws.go +++ b/pkg/providers/v1/aws.go @@ -1417,6 +1417,10 @@ func newAWSCloud(cfg CloudConfig, awsServices Services) (*Cloud, error) { return awsCloud, nil } +func NewAWSCloud(cfg CloudConfig, awsServices Services) (*Cloud, error) { + return newAWSCloud(cfg, awsServices) +} + // isRegionValid accepts an AWS region name and returns if the region is a // valid region known to the AWS SDK. Considers the region returned from the // EC2 metadata service to be a valid region as it's only available on a host diff --git a/pkg/providers/v1/aws_fakes.go b/pkg/providers/v1/aws_fakes.go index 48eb4e942a..024d1f94e7 100644 --- a/pkg/providers/v1/aws_fakes.go +++ b/pkg/providers/v1/aws_fakes.go @@ -263,13 +263,13 @@ func (ec2i *FakeEC2Impl) RemoveSubnets() { } // CreateTags is not implemented but is required for interface conformance -func (ec2i *FakeEC2Impl) CreateTags(*ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) { - panic("Not implemented") +func (ec2i *FakeEC2Impl) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) { + return &ec2.CreateTagsOutput{}, nil } // DeleteTags is not implemented but is required for interface conformance func (ec2i *FakeEC2Impl) DeleteTags(input *ec2.DeleteTagsInput) (*ec2.DeleteTagsOutput, error) { - panic("Not implemented") + return &ec2.DeleteTagsOutput{}, nil } // DescribeRouteTables returns fake route table descriptions diff --git a/pkg/providers/v1/tags.go b/pkg/providers/v1/tags.go index bc66436c12..668275e73e 100644 --- a/pkg/providers/v1/tags.go +++ b/pkg/providers/v1/tags.go @@ -317,8 +317,10 @@ func (c *Cloud) TagResource(resourceId string, tags map[string]string) error { Tags: buildAwsTags(tags), } - _, err := c.ec2.CreateTags(request) + res, err := c.ec2.CreateTags(request) + klog.Infof("NGUYEN: %v", res) + if err != nil { klog.Errorf("Error occurred trying to tag resources, %v", err) return err From a8efac1a547d7b95df7543426c880bf8a22f502f Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Tue, 22 Mar 2022 19:56:23 -0700 Subject: [PATCH 17/40] Making the tests more robust --- .../tagging/tagging_controller_test.go | 72 ++++++++++++++++--- 1 file changed, 61 insertions(+), 11 deletions(-) diff --git a/pkg/controllers/tagging/tagging_controller_test.go b/pkg/controllers/tagging/tagging_controller_test.go index 807f82475c..186ecb9c65 100644 --- a/pkg/controllers/tagging/tagging_controller_test.go +++ b/pkg/controllers/tagging/tagging_controller_test.go @@ -29,12 +29,13 @@ import ( const TestClusterID = "clusterid.test" -func Test_NodesJoining(t *testing.T) { +func Test_NodesJoiningAndLeaving(t *testing.T) { testcases := []struct { name string currNode *v1.Node taggingController TaggingController - noOfNodes int + noOfCurrNodes int + totalNodes int }{ { name: "node0 joins the cluster.", @@ -51,7 +52,8 @@ func Test_NodesJoining(t *testing.T) { taggedNodes: make(map[string]bool), nodeMap: make(map[string]*v1.Node), }, - noOfNodes: 1, + noOfCurrNodes: 1, + totalNodes: 1, }, { name: "node1 joins the cluster, node0 left.", @@ -80,7 +82,8 @@ func Test_NodesJoining(t *testing.T) { }, }, }, - noOfNodes: 1, + noOfCurrNodes: 1, + totalNodes: 2, }, { name: "node2 joins the cluster, node0 and node1 left.", @@ -119,7 +122,58 @@ func Test_NodesJoining(t *testing.T) { }, }, }, - noOfNodes: 1, + noOfCurrNodes: 1, + totalNodes: 3, + }, + { + name: "no new node joins the cluster.", + currNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "i-00002", + }, + }, + taggingController: TaggingController{ + taggedNodes: map[string]bool{ + "node0": true, + "node1": true, + "node2": true, + }, + nodeMap: map[string]*v1.Node{ + "node0": { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "i-00000", + }, + }, + "node1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "i-00001", + }, + }, + "node2": { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "i-00002", + }, + }, + }, + }, + noOfCurrNodes: 1, + totalNodes: 3, }, } @@ -147,14 +201,10 @@ func Test_NodesJoining(t *testing.T) { w := eventBroadcaster.StartLogging(klog.Infof) defer w.Stop() - nodeCountBeforeTagging := len(testcase.taggingController.nodeMap) testcase.taggingController.MonitorNodes(ctx) - klog.Infof("testcase.taggingController.taggedNodes %s", testcase.taggingController.taggedNodes) - klog.Errorf("testcase.taggingController.nodeMap %s", testcase.taggingController.nodeMap) - - if len(testcase.taggingController.taggedNodes) != testcase.noOfNodes || len(testcase.taggingController.nodeMap) != nodeCountBeforeTagging+testcase.noOfNodes { - t.Errorf("taggedNodes must contain %d element(s), and nodeMap must contain %d element(s).", testcase.noOfNodes, nodeCountBeforeTagging+testcase.noOfNodes) + if len(testcase.taggingController.taggedNodes) != testcase.noOfCurrNodes || len(testcase.taggingController.nodeMap) != testcase.totalNodes { + t.Errorf("taggedNodes must contain %d element(s), and nodeMap must contain %d element(s).", testcase.noOfCurrNodes, testcase.totalNodes) } }) } From bbcf7bcbc7ba6d53106ca647e1da0da720962072 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Wed, 23 Mar 2022 10:04:13 -0700 Subject: [PATCH 18/40] Renaming the maps in tagging controller --- pkg/controllers/tagging/tagging_controller.go | 28 +++++++++---------- .../tagging/tagging_controller_test.go | 20 ++++++------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index fbc8af1674..3b98f89d45 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -45,10 +45,10 @@ type TaggingController struct { nodeMonitorPeriod time.Duration // A map presenting the node and whether it currently exists - taggedNodes map[string]bool + currNodes map[string]bool - // A map representing nodes that were part of the cluster at any point in time - nodeMap map[string]*v1.Node + // A map representing nodes that were ever part of the cluster + totalNodes map[string]*v1.Node // Representing the user input for tags tags map[string]string @@ -76,8 +76,8 @@ func NewTaggingController( nodeLister: nodeInformer.Lister(), cloud: awsCloud, nodeMonitorPeriod: nodeMonitorPeriod, - taggedNodes: make(map[string]bool), - nodeMap: make(map[string]*v1.Node), + currNodes: make(map[string]bool), + totalNodes: make(map[string]*v1.Node), tags: tags, } return tc, nil @@ -98,25 +98,25 @@ func (tc *TaggingController) MonitorNodes(ctx context.Context) { return } - for k := range tc.taggedNodes { - tc.taggedNodes[k] = false + for k := range tc.currNodes { + tc.currNodes[k] = false } var nodesToTag []*v1.Node for _, node := range nodes { - if _, ok := tc.taggedNodes[node.GetName()]; !ok { + if _, ok := tc.currNodes[node.GetName()]; !ok { nodesToTag = append(nodesToTag, node) } - tc.nodeMap[node.GetName()] = node - tc.taggedNodes[node.GetName()] = true + tc.totalNodes[node.GetName()] = node + tc.currNodes[node.GetName()] = true } tc.tagNodesResources(nodesToTag) var nodesToUntag []*v1.Node - for nodeName, existed := range tc.taggedNodes { + for nodeName, existed := range tc.currNodes { if existed == false { - nodesToUntag = append(nodesToUntag, tc.nodeMap[nodeName]) + nodesToUntag = append(nodesToUntag, tc.totalNodes[nodeName]) } } tc.untagNodeResources(nodesToUntag) @@ -132,7 +132,7 @@ func (tc *TaggingController) tagNodesResources(nodes []*v1.Node) { if !nodeTagged { // Node tagged unsuccessfully, remove from the map // so that we can try later if it still exists - delete(tc.taggedNodes, node.GetName()) + delete(tc.currNodes, node.GetName()) } } } @@ -165,7 +165,7 @@ func (tc *TaggingController) untagNodeResources(nodes []*v1.Node) { nodeUntagged = tc.untagEc2Instance(node) if nodeUntagged { - delete(tc.taggedNodes, node.GetName()) + delete(tc.currNodes, node.GetName()) } } } diff --git a/pkg/controllers/tagging/tagging_controller_test.go b/pkg/controllers/tagging/tagging_controller_test.go index 186ecb9c65..f5f771f0b7 100644 --- a/pkg/controllers/tagging/tagging_controller_test.go +++ b/pkg/controllers/tagging/tagging_controller_test.go @@ -49,8 +49,8 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { }, }, taggingController: TaggingController{ - taggedNodes: make(map[string]bool), - nodeMap: make(map[string]*v1.Node), + currNodes: make(map[string]bool), + totalNodes: make(map[string]*v1.Node), }, noOfCurrNodes: 1, totalNodes: 1, @@ -67,10 +67,10 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { }, }, taggingController: TaggingController{ - taggedNodes: map[string]bool{ + currNodes: map[string]bool{ "node0": true, }, - nodeMap: map[string]*v1.Node{ + totalNodes: map[string]*v1.Node{ "node0": { ObjectMeta: metav1.ObjectMeta{ Name: "node0", @@ -97,11 +97,11 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { }, }, taggingController: TaggingController{ - taggedNodes: map[string]bool{ + currNodes: map[string]bool{ "node0": true, "node1": true, }, - nodeMap: map[string]*v1.Node{ + totalNodes: map[string]*v1.Node{ "node0": { ObjectMeta: metav1.ObjectMeta{ Name: "node0", @@ -137,12 +137,12 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { }, }, taggingController: TaggingController{ - taggedNodes: map[string]bool{ + currNodes: map[string]bool{ "node0": true, "node1": true, "node2": true, }, - nodeMap: map[string]*v1.Node{ + totalNodes: map[string]*v1.Node{ "node0": { ObjectMeta: metav1.ObjectMeta{ Name: "node0", @@ -203,8 +203,8 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { testcase.taggingController.MonitorNodes(ctx) - if len(testcase.taggingController.taggedNodes) != testcase.noOfCurrNodes || len(testcase.taggingController.nodeMap) != testcase.totalNodes { - t.Errorf("taggedNodes must contain %d element(s), and nodeMap must contain %d element(s).", testcase.noOfCurrNodes, testcase.totalNodes) + if len(testcase.taggingController.currNodes) != testcase.noOfCurrNodes || len(testcase.taggingController.totalNodes) != testcase.totalNodes { + t.Errorf("currNodes must contain %d element(s), and totalNodes must contain %d element(s).", testcase.noOfCurrNodes, testcase.totalNodes) } }) } From 614c94dab78b0ac2d4a846335a2f00695c70e726 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Wed, 23 Mar 2022 10:33:04 -0700 Subject: [PATCH 19/40] Refactoring names and remove debugging logs --- pkg/controllers/tagging/tagging_controller.go | 17 ++++++++--------- pkg/providers/v1/tags.go | 4 +--- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index 3b98f89d45..0f28524ca5 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -83,8 +83,8 @@ func NewTaggingController( return tc, nil } -// Run will start the controller to tag resources attached to a cluster -// and untag resources detached from a cluster. +// Run will start the controller to tag resources attached to the cluster +// and untag resources detached from the cluster. func (tc *TaggingController) Run(ctx context.Context) { defer utilruntime.HandleCrash() @@ -94,8 +94,7 @@ func (tc *TaggingController) Run(ctx context.Context) { func (tc *TaggingController) MonitorNodes(ctx context.Context) { nodes, err := tc.nodeLister.List(labels.Everything()) if err != nil { - klog.Errorf("error listing nodes: %s", err) - return + klog.Fatalf("error listing nodes: %s", err) } for k := range tc.currNodes { @@ -122,7 +121,7 @@ func (tc *TaggingController) MonitorNodes(ctx context.Context) { tc.untagNodeResources(nodesToUntag) } -// tagNodesResources tag node resources from a list of node +// tagNodesResources tag node resources from a list of nodes // If we want to tag more resources, modify this function appropriately func (tc *TaggingController) tagNodesResources(nodes []*v1.Node) { for _, node := range nodes { @@ -130,15 +129,15 @@ func (tc *TaggingController) tagNodesResources(nodes []*v1.Node) { nodeTagged = tc.tagEc2Instances(node) if !nodeTagged { - // Node tagged unsuccessfully, remove from the map + // Node tagged unsuccessfully, remove from currNodes // so that we can try later if it still exists delete(tc.currNodes, node.GetName()) } } } -// tagEc2Instances applies the provided tags to each EC2 instances in -// the cluster. Return if a node is tagged or not +// tagEc2Instances applies the provided tags to each EC2 instance in +// the cluster. Return a boolean value representing if a node is tagged or not func (tc *TaggingController) tagEc2Instances(node *v1.Node) bool { instanceId, err := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() @@ -157,7 +156,7 @@ func (tc *TaggingController) tagEc2Instances(node *v1.Node) bool { return true } -// untagNodeResources untag node resources from a list of node +// untagNodeResources untag node resources from a list of nodes // If we want to untag more resources, modify this function appropriately func (tc *TaggingController) untagNodeResources(nodes []*v1.Node) { for _, node := range nodes { diff --git a/pkg/providers/v1/tags.go b/pkg/providers/v1/tags.go index 668275e73e..bc66436c12 100644 --- a/pkg/providers/v1/tags.go +++ b/pkg/providers/v1/tags.go @@ -317,10 +317,8 @@ func (c *Cloud) TagResource(resourceId string, tags map[string]string) error { Tags: buildAwsTags(tags), } - res, err := c.ec2.CreateTags(request) + _, err := c.ec2.CreateTags(request) - klog.Infof("NGUYEN: %v", res) - if err != nil { klog.Errorf("Error occurred trying to tag resources, %v", err) return err From 8ae20fff4b782bbcc355c820148c13ac17bd22c0 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Wed, 23 Mar 2022 11:44:44 -0700 Subject: [PATCH 20/40] Add failure test cases for when EC2 return error --- .../tagging/tagging_controller_test.go | 110 +++++++++++++++--- pkg/providers/v1/aws_fakes.go | 11 ++ 2 files changed, 106 insertions(+), 15 deletions(-) diff --git a/pkg/controllers/tagging/tagging_controller_test.go b/pkg/controllers/tagging/tagging_controller_test.go index f5f771f0b7..bf5c9a07a2 100644 --- a/pkg/controllers/tagging/tagging_controller_test.go +++ b/pkg/controllers/tagging/tagging_controller_test.go @@ -31,11 +31,11 @@ const TestClusterID = "clusterid.test" func Test_NodesJoiningAndLeaving(t *testing.T) { testcases := []struct { - name string - currNode *v1.Node - taggingController TaggingController - noOfCurrNodes int - totalNodes int + name string + currNode *v1.Node + taggingController TaggingController + noOfToBeTaggedNodes int + totalNodes int }{ { name: "node0 joins the cluster.", @@ -52,8 +52,8 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { currNodes: make(map[string]bool), totalNodes: make(map[string]*v1.Node), }, - noOfCurrNodes: 1, - totalNodes: 1, + noOfToBeTaggedNodes: 1, + totalNodes: 1, }, { name: "node1 joins the cluster, node0 left.", @@ -82,8 +82,8 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { }, }, }, - noOfCurrNodes: 1, - totalNodes: 2, + noOfToBeTaggedNodes: 1, + totalNodes: 2, }, { name: "node2 joins the cluster, node0 and node1 left.", @@ -122,8 +122,8 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { }, }, }, - noOfCurrNodes: 1, - totalNodes: 3, + noOfToBeTaggedNodes: 1, + totalNodes: 3, }, { name: "no new node joins the cluster.", @@ -172,8 +172,88 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { }, }, }, - noOfCurrNodes: 1, - totalNodes: 3, + noOfToBeTaggedNodes: 1, + totalNodes: 3, + }, + { + name: "node 3 joins the cluster but failed to be tagged.", + currNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node3", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "i-error", + }, + }, + taggingController: TaggingController{ + currNodes: map[string]bool{ + "node0": true, + "node1": true, + "node2": true, + }, + totalNodes: map[string]*v1.Node{ + "node0": { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "i-00000", + }, + }, + "node1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "i-00001", + }, + }, + "node2": { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "i-00002", + }, + }, + }, + }, + noOfToBeTaggedNodes: 0, + totalNodes: 4, + }, + { + name: "node 1 joins the cluster, node 0 left but failed to be untagged.", + currNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "i-0001", + }, + }, + taggingController: TaggingController{ + currNodes: map[string]bool{ + "node0": true, + }, + totalNodes: map[string]*v1.Node{ + "node0": { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "i-error", + }, + }, + }, + }, + noOfToBeTaggedNodes: 2, + totalNodes: 2, }, } @@ -203,8 +283,8 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { testcase.taggingController.MonitorNodes(ctx) - if len(testcase.taggingController.currNodes) != testcase.noOfCurrNodes || len(testcase.taggingController.totalNodes) != testcase.totalNodes { - t.Errorf("currNodes must contain %d element(s), and totalNodes must contain %d element(s).", testcase.noOfCurrNodes, testcase.totalNodes) + if len(testcase.taggingController.currNodes) != testcase.noOfToBeTaggedNodes || len(testcase.taggingController.totalNodes) != testcase.totalNodes { + t.Errorf("currNodes must contain %d element(s), and totalNodes must contain %d element(s).", testcase.noOfToBeTaggedNodes, testcase.totalNodes) } }) } diff --git a/pkg/providers/v1/aws_fakes.go b/pkg/providers/v1/aws_fakes.go index 024d1f94e7..ef3969f8c4 100644 --- a/pkg/providers/v1/aws_fakes.go +++ b/pkg/providers/v1/aws_fakes.go @@ -17,6 +17,7 @@ limitations under the License. package aws import ( + "errors" "fmt" "sort" "strings" @@ -264,11 +265,21 @@ func (ec2i *FakeEC2Impl) RemoveSubnets() { // CreateTags is not implemented but is required for interface conformance func (ec2i *FakeEC2Impl) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) { + for _, id := range input.Resources { + if *id == "i-error" { + return nil, errors.New("Unable to tag") + } + } return &ec2.CreateTagsOutput{}, nil } // DeleteTags is not implemented but is required for interface conformance func (ec2i *FakeEC2Impl) DeleteTags(input *ec2.DeleteTagsInput) (*ec2.DeleteTagsOutput, error) { + for _, id := range input.Resources { + if *id == "i-error" { + return nil, errors.New("Unable to remove tag") + } + } return &ec2.DeleteTagsOutput{}, nil } From a8322bb2b3cb635092228cbbe7ffe7b670ae2fdf Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Thu, 24 Mar 2022 13:37:52 -0700 Subject: [PATCH 21/40] adding details for --resources * get user input for resources * Add better testing for failures to add flags * fix a small issue with --resources * finalize the --resources --- docs/tagging_controller.md | 7 ++++ pkg/controllers/options/resources.go | 9 ++++ pkg/controllers/options/tagging_controller.go | 27 +++++++++++- pkg/controllers/tagging/tagging_controller.go | 42 ++++++++++++------- .../tagging/tagging_controller_test.go | 19 +++++---- .../tagging/tagging_controller_wrapper.go | 5 ++- pkg/providers/v1/aws.go | 2 +- 7 files changed, 84 insertions(+), 27 deletions(-) create mode 100644 docs/tagging_controller.md create mode 100644 pkg/controllers/options/resources.go diff --git a/docs/tagging_controller.md b/docs/tagging_controller.md new file mode 100644 index 0000000000..86317624c2 --- /dev/null +++ b/docs/tagging_controller.md @@ -0,0 +1,7 @@ +# The Tagging Controller + +The tagging controller is responsible for tagging and untagging node resources when it joins and leaves the cluster respectively. It can add and remove tags based on user input. Unlike the existing controllers, the tagging controller is working exclusively with AWS as we want to tag the resources (EC instances for example). For functionalities used by the controller, we primarily use `CreateTags` and `DeleteTags` from `EC2`. + +| Flag | Valid Values | Default | Description | +|------| --- | --- | --- | +| tags | Comma-separated list of key=value | - | A comma-separated list of key-value pairs which will be recorded as nodes' additional tags. For example: "Key1=Val1,Key2=Val2,KeyNoVal1=,KeyNoVal2" | \ No newline at end of file diff --git a/pkg/controllers/options/resources.go b/pkg/controllers/options/resources.go new file mode 100644 index 0000000000..19cdd89ff6 --- /dev/null +++ b/pkg/controllers/options/resources.go @@ -0,0 +1,9 @@ +package options + +const ( + Instance string = "instance" +) + +var SupportedResources = map[string]string{ + "instance": Instance, +} diff --git a/pkg/controllers/options/tagging_controller.go b/pkg/controllers/options/tagging_controller.go index be217696db..21244b9bc7 100644 --- a/pkg/controllers/options/tagging_controller.go +++ b/pkg/controllers/options/tagging_controller.go @@ -1,3 +1,16 @@ +/* +Copyright 2016 The Kubernetes Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package options import ( @@ -6,11 +19,13 @@ import ( ) type TaggingControllerOptions struct { - Tags map[string]string + Tags map[string]string + Resources []string } func (o *TaggingControllerOptions) AddFlags(fs *pflag.FlagSet) { fs.StringToStringVar(&o.Tags, "tags", o.Tags, "Tags to apply to AWS resources in the tagging controller.") + fs.StringArrayVar(&o.Resources, "resources", o.Resources, "AWS resources name to add/remove tags in the tagging controller.") } func (o *TaggingControllerOptions) Validate() error { @@ -18,5 +33,15 @@ func (o *TaggingControllerOptions) Validate() error { return fmt.Errorf("--tags must not be empty") } + if len(o.Resources) == 0 { + return fmt.Errorf("--resources must not be empty") + } + + for _, r := range o.Resources { + if _, ok := SupportedResources[r]; !ok { + return fmt.Errorf("%s is not a supported resource", r) + } + } + return nil } diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index 0f28524ca5..4ebbe9d10d 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -25,6 +25,7 @@ import ( v1lister "k8s.io/client-go/listers/core/v1" cloudprovider "k8s.io/cloud-provider" "k8s.io/cloud-provider-aws/pkg/controllers/options" + opt "k8s.io/cloud-provider-aws/pkg/controllers/options" awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" "k8s.io/klog/v2" "time" @@ -45,7 +46,7 @@ type TaggingController struct { nodeMonitorPeriod time.Duration // A map presenting the node and whether it currently exists - currNodes map[string]bool + currentNodes map[string]bool // A map representing nodes that were ever part of the cluster totalNodes map[string]*v1.Node @@ -63,7 +64,8 @@ func NewTaggingController( kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeMonitorPeriod time.Duration, - tags map[string]string) (*TaggingController, error) { + tags map[string]string, + resources []string) (*TaggingController, error) { awsCloud, ok := cloud.(*awsv1.Cloud) if !ok { @@ -76,9 +78,10 @@ func NewTaggingController( nodeLister: nodeInformer.Lister(), cloud: awsCloud, nodeMonitorPeriod: nodeMonitorPeriod, - currNodes: make(map[string]bool), + currentNodes: make(map[string]bool), totalNodes: make(map[string]*v1.Node), tags: tags, + resources: resources, } return tc, nil } @@ -97,24 +100,24 @@ func (tc *TaggingController) MonitorNodes(ctx context.Context) { klog.Fatalf("error listing nodes: %s", err) } - for k := range tc.currNodes { - tc.currNodes[k] = false + for k := range tc.currentNodes { + tc.currentNodes[k] = false } var nodesToTag []*v1.Node for _, node := range nodes { - if _, ok := tc.currNodes[node.GetName()]; !ok { + if _, ok := tc.currentNodes[node.GetName()]; !ok { nodesToTag = append(nodesToTag, node) } tc.totalNodes[node.GetName()] = node - tc.currNodes[node.GetName()] = true + tc.currentNodes[node.GetName()] = true } tc.tagNodesResources(nodesToTag) var nodesToUntag []*v1.Node - for nodeName, existed := range tc.currNodes { - if existed == false { + for nodeName, existed := range tc.currentNodes { + if !existed { nodesToUntag = append(nodesToUntag, tc.totalNodes[nodeName]) } } @@ -126,12 +129,18 @@ func (tc *TaggingController) MonitorNodes(ctx context.Context) { func (tc *TaggingController) tagNodesResources(nodes []*v1.Node) { for _, node := range nodes { nodeTagged := false - nodeTagged = tc.tagEc2Instances(node) + + for _, resource := range tc.resources { + switch resource { + case opt.Instance: + nodeTagged = tc.tagEc2Instances(node) + } + } if !nodeTagged { - // Node tagged unsuccessfully, remove from currNodes + // Node tagged unsuccessfully, remove from currentNodes // so that we can try later if it still exists - delete(tc.currNodes, node.GetName()) + delete(tc.currentNodes, node.GetName()) } } } @@ -161,10 +170,15 @@ func (tc *TaggingController) tagEc2Instances(node *v1.Node) bool { func (tc *TaggingController) untagNodeResources(nodes []*v1.Node) { for _, node := range nodes { nodeUntagged := false - nodeUntagged = tc.untagEc2Instance(node) + + for _, resource := range tc.resources { + if resource == opt.Instance { + nodeUntagged = tc.untagEc2Instance(node) + } + } if nodeUntagged { - delete(tc.currNodes, node.GetName()) + delete(tc.currentNodes, node.GetName()) } } } diff --git a/pkg/controllers/tagging/tagging_controller_test.go b/pkg/controllers/tagging/tagging_controller_test.go index bf5c9a07a2..5a7fe3ef58 100644 --- a/pkg/controllers/tagging/tagging_controller_test.go +++ b/pkg/controllers/tagging/tagging_controller_test.go @@ -49,8 +49,8 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { }, }, taggingController: TaggingController{ - currNodes: make(map[string]bool), - totalNodes: make(map[string]*v1.Node), + currentNodes: make(map[string]bool), + totalNodes: make(map[string]*v1.Node), }, noOfToBeTaggedNodes: 1, totalNodes: 1, @@ -67,7 +67,7 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { }, }, taggingController: TaggingController{ - currNodes: map[string]bool{ + currentNodes: map[string]bool{ "node0": true, }, totalNodes: map[string]*v1.Node{ @@ -97,7 +97,7 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { }, }, taggingController: TaggingController{ - currNodes: map[string]bool{ + currentNodes: map[string]bool{ "node0": true, "node1": true, }, @@ -137,7 +137,7 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { }, }, taggingController: TaggingController{ - currNodes: map[string]bool{ + currentNodes: map[string]bool{ "node0": true, "node1": true, "node2": true, @@ -187,7 +187,7 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { }, }, taggingController: TaggingController{ - currNodes: map[string]bool{ + currentNodes: map[string]bool{ "node0": true, "node1": true, "node2": true, @@ -237,7 +237,7 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { }, }, taggingController: TaggingController{ - currNodes: map[string]bool{ + currentNodes: map[string]bool{ "node0": true, }, totalNodes: map[string]*v1.Node{ @@ -283,9 +283,10 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { testcase.taggingController.MonitorNodes(ctx) - if len(testcase.taggingController.currNodes) != testcase.noOfToBeTaggedNodes || len(testcase.taggingController.totalNodes) != testcase.totalNodes { - t.Errorf("currNodes must contain %d element(s), and totalNodes must contain %d element(s).", testcase.noOfToBeTaggedNodes, testcase.totalNodes) + if len(testcase.taggingController.currentNodes) != testcase.noOfToBeTaggedNodes || len(testcase.taggingController.totalNodes) != testcase.totalNodes { + t.Errorf("currentNodes must contain %d element(s), and totalNodes must contain %d element(s).", testcase.noOfToBeTaggedNodes, testcase.totalNodes) } + }) } } diff --git a/pkg/controllers/tagging/tagging_controller_wrapper.go b/pkg/controllers/tagging/tagging_controller_wrapper.go index c852f333af..1c402d9f35 100644 --- a/pkg/controllers/tagging/tagging_controller_wrapper.go +++ b/pkg/controllers/tagging/tagging_controller_wrapper.go @@ -32,7 +32,7 @@ func (tc *TaggingControllerWrapper) StartTaggingControllerWrapper(initContext ap func (tc *TaggingControllerWrapper) startTaggingController(ctx context.Context, initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) (controller.Interface, bool, error) { err := tc.Options.Validate() if err != nil { - klog.Fatal("Tagging controller inputs are not properly set.") + klog.Fatalf("Tagging controller inputs are not properly set: %v", err) } // Start the TaggingController @@ -41,7 +41,8 @@ func (tc *TaggingControllerWrapper) startTaggingController(ctx context.Context, completedConfig.ClientBuilder.ClientOrDie(initContext.ClientName), cloud, completedConfig.ComponentConfig.KubeCloudShared.NodeMonitorPeriod.Duration, - tc.Options.Tags) + tc.Options.Tags, + tc.Options.Resources) if err != nil { klog.Warningf("failed to start tagging controller: %s", err) diff --git a/pkg/providers/v1/aws.go b/pkg/providers/v1/aws.go index 280b678c79..b6f6ede506 100644 --- a/pkg/providers/v1/aws.go +++ b/pkg/providers/v1/aws.go @@ -1162,7 +1162,7 @@ func (s *awsSdkEC2) DeleteTags(request *ec2.DeleteTagsInput) (*ec2.DeleteTagsOut requestTime := time.Now() resp, err := s.ec2.DeleteTags(request) timeTaken := time.Since(requestTime).Seconds() - recordAWSMetric("create_tags", timeTaken, err) + recordAWSMetric("delete_tags", timeTaken, err) return resp, err } From 51de80b8e54dba880ed0c6a66b93840ba57f2dbb Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Thu, 24 Mar 2022 13:40:26 -0700 Subject: [PATCH 22/40] add in Copyright message --- pkg/controllers/options/resources.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pkg/controllers/options/resources.go b/pkg/controllers/options/resources.go index 19cdd89ff6..3e63ae3e7d 100644 --- a/pkg/controllers/options/resources.go +++ b/pkg/controllers/options/resources.go @@ -1,3 +1,16 @@ +/* +Copyright 2016 The Kubernetes Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package options const ( From 97cca844474765568bd9403b9ab870bdae4dd452 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Thu, 24 Mar 2022 20:39:23 -0700 Subject: [PATCH 23/40] Using NodeInformer and Workqueue for tagging resources --- pkg/controllers/options/resources.go | 1 - pkg/controllers/tagging/tagging_controller.go | 156 +++++++++++------- .../tagging/tagging_controller_test.go | 1 + .../tagging/tagging_controller_wrapper.go | 2 +- 4 files changed, 98 insertions(+), 62 deletions(-) diff --git a/pkg/controllers/options/resources.go b/pkg/controllers/options/resources.go index 3e63ae3e7d..381fcde6f0 100644 --- a/pkg/controllers/options/resources.go +++ b/pkg/controllers/options/resources.go @@ -10,7 +10,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - package options const ( diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index 4ebbe9d10d..7170f31862 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -14,15 +14,16 @@ limitations under the License. package tagging import ( - "context" "fmt" v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/labels" + apierrors "k8s.io/apimachinery/pkg/api/errors" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" coreinformers "k8s.io/client-go/informers/core/v1" clientset "k8s.io/client-go/kubernetes" v1lister "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" cloudprovider "k8s.io/cloud-provider" "k8s.io/cloud-provider-aws/pkg/controllers/options" opt "k8s.io/cloud-provider-aws/pkg/controllers/options" @@ -35,10 +36,12 @@ import ( // It periodically check for Node events (creating/deleting) to apply appropriate // tags to resources. type TaggingController struct { + nodeInformer coreinformers.NodeInformer controllerOptions options.TaggingControllerOptions kubeClient clientset.Interface nodeLister v1lister.NodeLister cloud *awsv1.Cloud + workqueue workqueue.RateLimitingInterface // Value controlling TaggingController monitoring period, i.e. how often does TaggingController // check node list. This value should be lower than nodeMonitorGracePeriod @@ -74,6 +77,7 @@ func NewTaggingController( } tc := &TaggingController{ + nodeInformer: nodeInformer, kubeClient: kubeClient, nodeLister: nodeInformer.Lister(), cloud: awsCloud, @@ -82,123 +86,155 @@ func NewTaggingController( totalNodes: make(map[string]*v1.Node), tags: tags, resources: resources, + workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "Tagging"), } + + // Use shared informer to listen to add/update/delete of nodes. Note that any nodes + // that exist before tagging controller starts will show up in the update method + tc.nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: tc.enqueueNode, + UpdateFunc: func(oldObj, newObj interface{}) { tc.enqueueNode(newObj) }, + // TODO: maybe use workqueue for this to be more resilient + DeleteFunc: tc.untagNodeResources, + }) + return tc, nil } // Run will start the controller to tag resources attached to the cluster // and untag resources detached from the cluster. -func (tc *TaggingController) Run(ctx context.Context) { +func (tc *TaggingController) Run(stopCh <-chan struct{}) { defer utilruntime.HandleCrash() + defer tc.workqueue.ShutDown() + + klog.Infof("Starting the tagging controller") + go wait.Until(tc.MonitorNodes, tc.nodeMonitorPeriod, stopCh) - wait.UntilWithContext(ctx, tc.MonitorNodes, tc.nodeMonitorPeriod) + <-stopCh } -func (tc *TaggingController) MonitorNodes(ctx context.Context) { - nodes, err := tc.nodeLister.List(labels.Everything()) - if err != nil { - klog.Fatalf("error listing nodes: %s", err) +// MonitorNodes is a long-running function that continuously +// read and process a message on the work queue +func (tc *TaggingController) MonitorNodes() { + obj, shutdown := tc.workqueue.Get() + if shutdown { + return } - for k := range tc.currentNodes { - tc.currentNodes[k] = false - } + err := func(obj interface{}) error { + defer tc.workqueue.Done(obj) - var nodesToTag []*v1.Node - for _, node := range nodes { - if _, ok := tc.currentNodes[node.GetName()]; !ok { - nodesToTag = append(nodesToTag, node) + var key string + var ok bool + if key, ok = obj.(string); !ok { + tc.workqueue.Forget(obj) + utilruntime.HandleError(fmt.Errorf("expected string in workqueue but got %#v", obj)) + return nil } - tc.totalNodes[node.GetName()] = node - tc.currentNodes[node.GetName()] = true - } - tc.tagNodesResources(nodesToTag) + _, nodeName, err := cache.SplitMetaNamespaceKey(key) - var nodesToUntag []*v1.Node - for nodeName, existed := range tc.currentNodes { - if !existed { - nodesToUntag = append(nodesToUntag, tc.totalNodes[nodeName]) + if err != nil { + utilruntime.HandleError(fmt.Errorf("invalid resource key: %s", key)) + return nil + } + + if err := tc.tagNodesResources(nodeName); err != nil { + // Put the item back on the workqueue to handle any transient errors. + tc.workqueue.AddRateLimited(key) + return fmt.Errorf("error tagging '%s': %s, requeuing", key, err.Error()) } + + tc.workqueue.Forget(obj) + return nil + }(obj) + + if err != nil { + utilruntime.HandleError(err) } - tc.untagNodeResources(nodesToUntag) } // tagNodesResources tag node resources from a list of nodes // If we want to tag more resources, modify this function appropriately -func (tc *TaggingController) tagNodesResources(nodes []*v1.Node) { - for _, node := range nodes { - nodeTagged := false - - for _, resource := range tc.resources { - switch resource { - case opt.Instance: - nodeTagged = tc.tagEc2Instances(node) - } +func (tc *TaggingController) tagNodesResources(nodeName string) error { + node, err := tc.nodeInformer.Lister().Get(nodeName) + if err != nil { + if apierrors.IsNotFound(err) { + return nil } - if !nodeTagged { - // Node tagged unsuccessfully, remove from currentNodes - // so that we can try later if it still exists - delete(tc.currentNodes, node.GetName()) + return err + } + + for _, resource := range tc.resources { + switch resource { + case opt.Instance: + err = tc.tagEc2Instances(node) } } + + return err } // tagEc2Instances applies the provided tags to each EC2 instance in -// the cluster. Return a boolean value representing if a node is tagged or not -func (tc *TaggingController) tagEc2Instances(node *v1.Node) bool { +// the cluster. +func (tc *TaggingController) tagEc2Instances(node *v1.Node) error { instanceId, err := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() if err != nil { klog.Errorf("Error in getting instanceID for node %s, error: %v", node.GetName(), err) - return false + return err } else { err := tc.cloud.TagResource(string(instanceId), tc.tags) if err != nil { klog.Errorf("Error in tagging EC2 instance for node %s, error: %v", node.GetName(), err) - return false + return err } } - return true + return nil } // untagNodeResources untag node resources from a list of nodes // If we want to untag more resources, modify this function appropriately -func (tc *TaggingController) untagNodeResources(nodes []*v1.Node) { - for _, node := range nodes { - nodeUntagged := false - - for _, resource := range tc.resources { - if resource == opt.Instance { - nodeUntagged = tc.untagEc2Instance(node) - } - } +func (tc *TaggingController) untagNodeResources(obj interface{}) { + var node *v1.Node + var ok bool + if node, ok = obj.(*v1.Node); !ok { + utilruntime.HandleError(fmt.Errorf("unable to get Node object from %v", obj)) + } - if nodeUntagged { - delete(tc.currentNodes, node.GetName()) + for _, resource := range tc.resources { + switch resource { + case opt.Instance: + tc.untagEc2Instance(node) } } } // untagEc2Instances deletes the provided tags to each EC2 instances in -// the cluster. Return if a node is tagged or not -func (tc *TaggingController) untagEc2Instance(node *v1.Node) bool { +// the cluster. +func (tc *TaggingController) untagEc2Instance(node *v1.Node) { instanceId, err := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() if err != nil { - klog.Errorf("Error in getting instanceID for node %s, error: %v", node.GetName(), err) - return false + klog.Fatalf("Error in getting instanceID for node %s, error: %v", node.GetName(), err) } else { err := tc.cloud.UntagResource(string(instanceId), tc.tags) if err != nil { - klog.Errorf("Error in untagging EC2 instance for node %s, error: %v", node.GetName(), err) - return false + klog.Fatalf("Error in untagging EC2 instance for node %s, error: %v", node.GetName(), err) } } +} - return true +func (tc *TaggingController) enqueueNode(obj interface{}) { + var key string + var err error + if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil { + utilruntime.HandleError(err) + return + } + tc.workqueue.Add(key) } diff --git a/pkg/controllers/tagging/tagging_controller_test.go b/pkg/controllers/tagging/tagging_controller_test.go index 5a7fe3ef58..3e684e8bae 100644 --- a/pkg/controllers/tagging/tagging_controller_test.go +++ b/pkg/controllers/tagging/tagging_controller_test.go @@ -29,6 +29,7 @@ import ( const TestClusterID = "clusterid.test" +// TODO: rework the test func Test_NodesJoiningAndLeaving(t *testing.T) { testcases := []struct { name string diff --git a/pkg/controllers/tagging/tagging_controller_wrapper.go b/pkg/controllers/tagging/tagging_controller_wrapper.go index 1c402d9f35..f250ab78f0 100644 --- a/pkg/controllers/tagging/tagging_controller_wrapper.go +++ b/pkg/controllers/tagging/tagging_controller_wrapper.go @@ -49,7 +49,7 @@ func (tc *TaggingControllerWrapper) startTaggingController(ctx context.Context, return nil, false, nil } - go taggingcontroller.Run(ctx) + go taggingcontroller.Run(ctx.Done()) return nil, true, nil } From c1eccd06112da6215813d658ed5949b443a678b1 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Fri, 25 Mar 2022 10:44:42 -0700 Subject: [PATCH 24/40] Used workqueue for both tag and untag actions --- pkg/controllers/tagging/tagging_controller.go | 105 ++++++++++++------ 1 file changed, 71 insertions(+), 34 deletions(-) diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index 7170f31862..f7c31585df 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -29,9 +29,14 @@ import ( opt "k8s.io/cloud-provider-aws/pkg/controllers/options" awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" "k8s.io/klog/v2" + "strings" "time" ) +const ( + tag string = "tag" +) + // TaggingController is the controller implementation for tagging cluster resources. // It periodically check for Node events (creating/deleting) to apply appropriate // tags to resources. @@ -92,10 +97,9 @@ func NewTaggingController( // Use shared informer to listen to add/update/delete of nodes. Note that any nodes // that exist before tagging controller starts will show up in the update method tc.nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ - AddFunc: tc.enqueueNode, - UpdateFunc: func(oldObj, newObj interface{}) { tc.enqueueNode(newObj) }, - // TODO: maybe use workqueue for this to be more resilient - DeleteFunc: tc.untagNodeResources, + AddFunc: func(obj interface{}) { tc.enqueueNode(obj, true) }, + UpdateFunc: func(oldObj, newObj interface{}) { tc.enqueueNode(newObj, true) }, + DeleteFunc: func(obj interface{}) { tc.enqueueNode(obj, false) }, }) return tc, nil @@ -132,6 +136,9 @@ func (tc *TaggingController) MonitorNodes() { return nil } + var isTagged bool + isTagged, key = tc.getActionAndKey(key) + _, nodeName, err := cache.SplitMetaNamespaceKey(key) if err != nil { @@ -139,10 +146,26 @@ func (tc *TaggingController) MonitorNodes() { return nil } - if err := tc.tagNodesResources(nodeName); err != nil { - // Put the item back on the workqueue to handle any transient errors. - tc.workqueue.AddRateLimited(key) - return fmt.Errorf("error tagging '%s': %s, requeuing", key, err.Error()) + node, err := tc.nodeInformer.Lister().Get(nodeName) + if err != nil { + if apierrors.IsNotFound(err) { + return nil + } + + return err + } + + if isTagged { + if err := tc.tagNodesResources(node); err != nil { + // Put the item back on the workqueue to handle any transient errors. + tc.workqueue.AddRateLimited(tag + key) + return fmt.Errorf("error tagging '%s': %s, requeuing", key, err.Error()) + } + } else { + if err := tc.untagNodeResources(node); err != nil { + tc.workqueue.AddRateLimited(key) + return fmt.Errorf("error untagging '%s': %s, requeuing", key, err.Error()) + } } tc.workqueue.Forget(obj) @@ -156,29 +179,23 @@ func (tc *TaggingController) MonitorNodes() { // tagNodesResources tag node resources from a list of nodes // If we want to tag more resources, modify this function appropriately -func (tc *TaggingController) tagNodesResources(nodeName string) error { - node, err := tc.nodeInformer.Lister().Get(nodeName) - if err != nil { - if apierrors.IsNotFound(err) { - return nil - } - - return err - } - +func (tc *TaggingController) tagNodesResources(node *v1.Node) error { for _, resource := range tc.resources { switch resource { case opt.Instance: - err = tc.tagEc2Instances(node) + err := tc.tagEc2Instance(node) + if err != nil { + return err + } } } - return err + return nil } // tagEc2Instances applies the provided tags to each EC2 instance in // the cluster. -func (tc *TaggingController) tagEc2Instances(node *v1.Node) error { +func (tc *TaggingController) tagEc2Instance(node *v1.Node) error { instanceId, err := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() if err != nil { @@ -198,43 +215,63 @@ func (tc *TaggingController) tagEc2Instances(node *v1.Node) error { // untagNodeResources untag node resources from a list of nodes // If we want to untag more resources, modify this function appropriately -func (tc *TaggingController) untagNodeResources(obj interface{}) { - var node *v1.Node - var ok bool - if node, ok = obj.(*v1.Node); !ok { - utilruntime.HandleError(fmt.Errorf("unable to get Node object from %v", obj)) - } - +func (tc *TaggingController) untagNodeResources(node *v1.Node) error { for _, resource := range tc.resources { switch resource { case opt.Instance: - tc.untagEc2Instance(node) + err := tc.untagEc2Instance(node) + if err != nil { + return err + } } } + + return nil } // untagEc2Instances deletes the provided tags to each EC2 instances in // the cluster. -func (tc *TaggingController) untagEc2Instance(node *v1.Node) { +func (tc *TaggingController) untagEc2Instance(node *v1.Node) error { instanceId, err := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() if err != nil { - klog.Fatalf("Error in getting instanceID for node %s, error: %v", node.GetName(), err) + klog.Errorf("Error in getting instanceID for node %s, error: %v", node.GetName(), err) + return err } else { err := tc.cloud.UntagResource(string(instanceId), tc.tags) if err != nil { - klog.Fatalf("Error in untagging EC2 instance for node %s, error: %v", node.GetName(), err) + klog.Errorf("Error in untagging EC2 instance for node %s, error: %v", node.GetName(), err) + return err } } + + return nil } -func (tc *TaggingController) enqueueNode(obj interface{}) { +// enqueueNode takes in the object to enqueue to the workqueue and whether +// the object is to be tagged +func (tc *TaggingController) enqueueNode(obj interface{}, isTagged bool) { var key string var err error if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil { utilruntime.HandleError(err) return } - tc.workqueue.Add(key) + + if isTagged { + tc.workqueue.Add(tag + key) + } else { + tc.workqueue.Add(key) + } +} + +func (tc *TaggingController) getActionAndKey(key string) (bool, string) { + isTagged := false + if strings.HasPrefix(key, tag) { + isTagged = true + key = strings.TrimPrefix(key, tag) + } + + return isTagged, key } From 627b1cd84337318f1b29619b606dee4bf4064c21 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Fri, 25 Mar 2022 14:16:26 -0700 Subject: [PATCH 25/40] Update docs/tagging_controller.md Co-authored-by: Nicholas Turner <1205393+nckturner@users.noreply.github.com> --- docs/tagging_controller.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tagging_controller.md b/docs/tagging_controller.md index 86317624c2..74161ab969 100644 --- a/docs/tagging_controller.md +++ b/docs/tagging_controller.md @@ -1,6 +1,6 @@ # The Tagging Controller -The tagging controller is responsible for tagging and untagging node resources when it joins and leaves the cluster respectively. It can add and remove tags based on user input. Unlike the existing controllers, the tagging controller is working exclusively with AWS as we want to tag the resources (EC instances for example). For functionalities used by the controller, we primarily use `CreateTags` and `DeleteTags` from `EC2`. +The tagging controller is responsible for tagging and untagging node resources when they join and leave the cluster, respectively. It can add and remove tags based on user input. Unlike the existing controllers, the tagging controller works exclusively with AWS. The AWS APIs it uses are `ec2:CreateTags` and `ec2:DeleteTags`. | Flag | Valid Values | Default | Description | |------| --- | --- | --- | From aeebb9f939f382fc563960d58d3ab3a7165312d1 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Fri, 25 Mar 2022 18:03:20 -0700 Subject: [PATCH 26/40] Renamed fields in the tagging controller to be more user friendly --- pkg/controllers/options/tagging_controller.go | 8 +- pkg/controllers/tagging/tagging_controller.go | 51 ++-- .../tagging/tagging_controller_test.go | 244 ++---------------- pkg/providers/v1/aws_fakes.go | 28 +- 4 files changed, 74 insertions(+), 257 deletions(-) diff --git a/pkg/controllers/options/tagging_controller.go b/pkg/controllers/options/tagging_controller.go index 21244b9bc7..52cc6593aa 100644 --- a/pkg/controllers/options/tagging_controller.go +++ b/pkg/controllers/options/tagging_controller.go @@ -30,7 +30,7 @@ func (o *TaggingControllerOptions) AddFlags(fs *pflag.FlagSet) { func (o *TaggingControllerOptions) Validate() error { if len(o.Tags) == 0 { - return fmt.Errorf("--tags must not be empty") + return fmt.Errorf("--tags must not be empty and must be a form of key:value") } if len(o.Resources) == 0 { @@ -39,7 +39,11 @@ func (o *TaggingControllerOptions) Validate() error { for _, r := range o.Resources { if _, ok := SupportedResources[r]; !ok { - return fmt.Errorf("%s is not a supported resource", r) + resources := []string{} + for r, _ := range SupportedResources { + resources = append(resources, r) + } + return fmt.Errorf("%s is not a supported resource. Current supported resources %v", r, resources) } } diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index f7c31585df..46d66fa33b 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -21,11 +21,9 @@ import ( "k8s.io/apimachinery/pkg/util/wait" coreinformers "k8s.io/client-go/informers/core/v1" clientset "k8s.io/client-go/kubernetes" - v1lister "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" cloudprovider "k8s.io/cloud-provider" - "k8s.io/cloud-provider-aws/pkg/controllers/options" opt "k8s.io/cloud-provider-aws/pkg/controllers/options" awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" "k8s.io/klog/v2" @@ -34,31 +32,25 @@ import ( ) const ( - tag string = "tag" + // This is a prefix used to recognized if a node in the workqueue + // is to be tagged or not + tagKeyPrefix string = "tagKeyPrefix" ) // TaggingController is the controller implementation for tagging cluster resources. // It periodically check for Node events (creating/deleting) to apply appropriate // tags to resources. type TaggingController struct { - nodeInformer coreinformers.NodeInformer - controllerOptions options.TaggingControllerOptions - kubeClient clientset.Interface - nodeLister v1lister.NodeLister - cloud *awsv1.Cloud - workqueue workqueue.RateLimitingInterface + nodeInformer coreinformers.NodeInformer + kubeClient clientset.Interface + cloud *awsv1.Cloud + workqueue workqueue.RateLimitingInterface // Value controlling TaggingController monitoring period, i.e. how often does TaggingController // check node list. This value should be lower than nodeMonitorGracePeriod // set in controller-manager nodeMonitorPeriod time.Duration - // A map presenting the node and whether it currently exists - currentNodes map[string]bool - - // A map representing nodes that were ever part of the cluster - totalNodes map[string]*v1.Node - // Representing the user input for tags tags map[string]string @@ -84,11 +76,8 @@ func NewTaggingController( tc := &TaggingController{ nodeInformer: nodeInformer, kubeClient: kubeClient, - nodeLister: nodeInformer.Lister(), cloud: awsCloud, nodeMonitorPeriod: nodeMonitorPeriod, - currentNodes: make(map[string]bool), - totalNodes: make(map[string]*v1.Node), tags: tags, resources: resources, workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "Tagging"), @@ -136,8 +125,8 @@ func (tc *TaggingController) MonitorNodes() { return nil } - var isTagged bool - isTagged, key = tc.getActionAndKey(key) + var toBeTagged bool + toBeTagged, key = tc.getActionAndKey(key) _, nodeName, err := cache.SplitMetaNamespaceKey(key) @@ -155,10 +144,10 @@ func (tc *TaggingController) MonitorNodes() { return err } - if isTagged { + if toBeTagged { if err := tc.tagNodesResources(node); err != nil { // Put the item back on the workqueue to handle any transient errors. - tc.workqueue.AddRateLimited(tag + key) + tc.workqueue.AddRateLimited(tagKeyPrefix + key) return fmt.Errorf("error tagging '%s': %s, requeuing", key, err.Error()) } } else { @@ -251,7 +240,7 @@ func (tc *TaggingController) untagEc2Instance(node *v1.Node) error { // enqueueNode takes in the object to enqueue to the workqueue and whether // the object is to be tagged -func (tc *TaggingController) enqueueNode(obj interface{}, isTagged bool) { +func (tc *TaggingController) enqueueNode(obj interface{}, toBeTagged bool) { var key string var err error if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil { @@ -259,19 +248,21 @@ func (tc *TaggingController) enqueueNode(obj interface{}, isTagged bool) { return } - if isTagged { - tc.workqueue.Add(tag + key) + if toBeTagged { + tc.workqueue.Add(tagKeyPrefix + key) } else { tc.workqueue.Add(key) } } +// getActionAndKey from the provided key, check if the object is to be tagged +// and extract that action together with the key func (tc *TaggingController) getActionAndKey(key string) (bool, string) { - isTagged := false - if strings.HasPrefix(key, tag) { - isTagged = true - key = strings.TrimPrefix(key, tag) + toBeTagged := false + if strings.HasPrefix(key, tagKeyPrefix) { + toBeTagged = true + key = strings.TrimPrefix(key, tagKeyPrefix) } - return isTagged, key + return toBeTagged, key } diff --git a/pkg/controllers/tagging/tagging_controller_test.go b/pkg/controllers/tagging/tagging_controller_test.go index 3e684e8bae..ceb12aa6d9 100644 --- a/pkg/controllers/tagging/tagging_controller_test.go +++ b/pkg/controllers/tagging/tagging_controller_test.go @@ -15,12 +15,14 @@ package tagging import ( "context" + "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/informers" coreinformers "k8s.io/client-go/informers/core/v1" "k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/tools/record" + "k8s.io/client-go/util/workqueue" awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" "k8s.io/klog/v2" "testing" @@ -29,14 +31,11 @@ import ( const TestClusterID = "clusterid.test" -// TODO: rework the test func Test_NodesJoiningAndLeaving(t *testing.T) { testcases := []struct { - name string - currNode *v1.Node - taggingController TaggingController - noOfToBeTaggedNodes int - totalNodes int + name string + currNode *v1.Node + expectedCalls []string }{ { name: "node0 joins the cluster.", @@ -49,212 +48,7 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { ProviderID: "i-00000", }, }, - taggingController: TaggingController{ - currentNodes: make(map[string]bool), - totalNodes: make(map[string]*v1.Node), - }, - noOfToBeTaggedNodes: 1, - totalNodes: 1, - }, - { - name: "node1 joins the cluster, node0 left.", - currNode: &v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node1", - CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - Spec: v1.NodeSpec{ - ProviderID: "i-00001", - }, - }, - taggingController: TaggingController{ - currentNodes: map[string]bool{ - "node0": true, - }, - totalNodes: map[string]*v1.Node{ - "node0": { - ObjectMeta: metav1.ObjectMeta{ - Name: "node0", - CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - Spec: v1.NodeSpec{ - ProviderID: "i-00000", - }, - }, - }, - }, - noOfToBeTaggedNodes: 1, - totalNodes: 2, - }, - { - name: "node2 joins the cluster, node0 and node1 left.", - currNode: &v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node2", - CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - Spec: v1.NodeSpec{ - ProviderID: "i-00002", - }, - }, - taggingController: TaggingController{ - currentNodes: map[string]bool{ - "node0": true, - "node1": true, - }, - totalNodes: map[string]*v1.Node{ - "node0": { - ObjectMeta: metav1.ObjectMeta{ - Name: "node0", - CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - Spec: v1.NodeSpec{ - ProviderID: "i-00000", - }, - }, - "node1": { - ObjectMeta: metav1.ObjectMeta{ - Name: "node1", - CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - Spec: v1.NodeSpec{ - ProviderID: "i-00001", - }, - }, - }, - }, - noOfToBeTaggedNodes: 1, - totalNodes: 3, - }, - { - name: "no new node joins the cluster.", - currNode: &v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node2", - CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - Spec: v1.NodeSpec{ - ProviderID: "i-00002", - }, - }, - taggingController: TaggingController{ - currentNodes: map[string]bool{ - "node0": true, - "node1": true, - "node2": true, - }, - totalNodes: map[string]*v1.Node{ - "node0": { - ObjectMeta: metav1.ObjectMeta{ - Name: "node0", - CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - Spec: v1.NodeSpec{ - ProviderID: "i-00000", - }, - }, - "node1": { - ObjectMeta: metav1.ObjectMeta{ - Name: "node1", - CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - Spec: v1.NodeSpec{ - ProviderID: "i-00001", - }, - }, - "node2": { - ObjectMeta: metav1.ObjectMeta{ - Name: "node2", - CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - Spec: v1.NodeSpec{ - ProviderID: "i-00002", - }, - }, - }, - }, - noOfToBeTaggedNodes: 1, - totalNodes: 3, - }, - { - name: "node 3 joins the cluster but failed to be tagged.", - currNode: &v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node3", - CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - Spec: v1.NodeSpec{ - ProviderID: "i-error", - }, - }, - taggingController: TaggingController{ - currentNodes: map[string]bool{ - "node0": true, - "node1": true, - "node2": true, - }, - totalNodes: map[string]*v1.Node{ - "node0": { - ObjectMeta: metav1.ObjectMeta{ - Name: "node0", - CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - Spec: v1.NodeSpec{ - ProviderID: "i-00000", - }, - }, - "node1": { - ObjectMeta: metav1.ObjectMeta{ - Name: "node1", - CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - Spec: v1.NodeSpec{ - ProviderID: "i-00001", - }, - }, - "node2": { - ObjectMeta: metav1.ObjectMeta{ - Name: "node2", - CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - Spec: v1.NodeSpec{ - ProviderID: "i-00002", - }, - }, - }, - }, - noOfToBeTaggedNodes: 0, - totalNodes: 4, - }, - { - name: "node 1 joins the cluster, node 0 left but failed to be untagged.", - currNode: &v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node1", - CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - Spec: v1.NodeSpec{ - ProviderID: "i-0001", - }, - }, - taggingController: TaggingController{ - currentNodes: map[string]bool{ - "node0": true, - }, - totalNodes: map[string]*v1.Node{ - "node0": { - ObjectMeta: metav1.ObjectMeta{ - Name: "node0", - CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), - }, - Spec: v1.NodeSpec{ - ProviderID: "i-error", - }, - }, - }, - }, - noOfToBeTaggedNodes: 2, - totalNodes: 2, + expectedCalls: []string{"create-tags"}, }, } @@ -263,8 +57,6 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { for _, testcase := range testcases { t.Run(testcase.name, func(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() clientset := fake.NewSimpleClientset(testcase.currNode) informer := informers.NewSharedInformerFactory(clientset, time.Second) nodeInformer := informer.Core().V1().Nodes() @@ -274,20 +66,26 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { } eventBroadcaster := record.NewBroadcaster() - testcase.taggingController.nodeLister = nodeInformer.Lister() - testcase.taggingController.kubeClient = clientset - testcase.taggingController.cloud = fakeAws - testcase.taggingController.nodeMonitorPeriod = 1 * time.Second + tc := &TaggingController{ + nodeInformer: nodeInformer, + kubeClient: clientset, + cloud: fakeAws, + nodeMonitorPeriod: 1 * time.Second, + tags: map[string]string{"key": "value"}, + resources: []string{"instance"}, + workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "Tagging"), + } w := eventBroadcaster.StartLogging(klog.Infof) defer w.Stop() - testcase.taggingController.MonitorNodes(ctx) - - if len(testcase.taggingController.currentNodes) != testcase.noOfToBeTaggedNodes || len(testcase.taggingController.totalNodes) != testcase.totalNodes { - t.Errorf("currentNodes must contain %d element(s), and totalNodes must contain %d element(s).", testcase.noOfToBeTaggedNodes, testcase.totalNodes) - } + tc.enqueueNode(testcase.currNode, true) + tc.MonitorNodes() + ec2, _ := awsServices.Compute("") + assert.EqualValues(t, testcase.expectedCalls, ec2., + "expected cloud provider methods `%v` to be called but `%v` was called ", + testcase.expectedCalls, awsServices.MadeRequest.Calls) }) } } diff --git a/pkg/providers/v1/aws_fakes.go b/pkg/providers/v1/aws_fakes.go index ef3969f8c4..fbaf1d4636 100644 --- a/pkg/providers/v1/aws_fakes.go +++ b/pkg/providers/v1/aws_fakes.go @@ -21,6 +21,8 @@ import ( "fmt" "sort" "strings" + "sync" + "time" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/autoscaling" @@ -48,6 +50,14 @@ type FakeAWSServices struct { kms *FakeKMS } +type FakeCloud struct { + cloud *Cloud + + Calls []string + addCallLock sync.Mutex + RequestDelay time.Duration +} + // NewFakeAWSServices creates a new FakeAWSServices func NewFakeAWSServices(clusterID string) *FakeAWSServices { s := &FakeAWSServices{} @@ -263,8 +273,8 @@ func (ec2i *FakeEC2Impl) RemoveSubnets() { ec2i.Subnets = ec2i.Subnets[:0] } -// CreateTags is not implemented but is required for interface conformance func (ec2i *FakeEC2Impl) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) { + addCall("create-tags") for _, id := range input.Resources { if *id == "i-error" { return nil, errors.New("Unable to tag") @@ -273,8 +283,8 @@ func (ec2i *FakeEC2Impl) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTags return &ec2.CreateTagsOutput{}, nil } -// DeleteTags is not implemented but is required for interface conformance func (ec2i *FakeEC2Impl) DeleteTags(input *ec2.DeleteTagsInput) (*ec2.DeleteTagsOutput, error) { + ec2i.addCall("delete-tags") for _, id := range input.Resources { if *id == "i-error" { return nil, errors.New("Unable to remove tag") @@ -283,6 +293,20 @@ func (ec2i *FakeEC2Impl) DeleteTags(input *ec2.DeleteTagsInput) (*ec2.DeleteTags return &ec2.DeleteTagsOutput{}, nil } +func (f *FakeCloud) addCall(desc string) { + f.addCallLock.Lock() + defer f.addCallLock.Unlock() + + time.Sleep(f.RequestDelay) + + f.Calls = append(f.Calls, desc) +} + +// ClearCalls clears internal record of method calls to this Cloud. +func (f *FakeCloud) ClearCalls() { + f.Calls = []string{} +} + // DescribeRouteTables returns fake route table descriptions func (ec2i *FakeEC2Impl) DescribeRouteTables(request *ec2.DescribeRouteTablesInput) ([]*ec2.RouteTable, error) { ec2i.DescribeRouteTablesInput = request From 121829dca1aa6870f93ed5f1be26bf885562ecf8 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Fri, 25 Mar 2022 18:36:58 -0700 Subject: [PATCH 27/40] Added in a loop to make sure all messages are processed before shutting down --- pkg/controllers/tagging/tagging_controller.go | 19 ++++++++++---- .../tagging/tagging_controller_test.go | 8 +----- pkg/providers/v1/aws_fakes.go | 26 ------------------- 3 files changed, 15 insertions(+), 38 deletions(-) diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index 46d66fa33b..a432b13787 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -101,17 +101,24 @@ func (tc *TaggingController) Run(stopCh <-chan struct{}) { defer tc.workqueue.ShutDown() klog.Infof("Starting the tagging controller") - go wait.Until(tc.MonitorNodes, tc.nodeMonitorPeriod, stopCh) + go wait.Until(tc.work, tc.nodeMonitorPeriod, stopCh) <-stopCh } -// MonitorNodes is a long-running function that continuously -// read and process a message on the work queue -func (tc *TaggingController) MonitorNodes() { +// work is a long-running function that continuously +// call process() for each message on the workqueue +func (tc *TaggingController) work() { + for tc.Process() { + } +} + +// Process reads each message in the queue and performs either +// tag or untag function on the Node object +func (tc *TaggingController) Process() bool { obj, shutdown := tc.workqueue.Get() if shutdown { - return + return false } err := func(obj interface{}) error { @@ -164,6 +171,8 @@ func (tc *TaggingController) MonitorNodes() { if err != nil { utilruntime.HandleError(err) } + + return true } // tagNodesResources tag node resources from a list of nodes diff --git a/pkg/controllers/tagging/tagging_controller_test.go b/pkg/controllers/tagging/tagging_controller_test.go index ceb12aa6d9..c58f9435ba 100644 --- a/pkg/controllers/tagging/tagging_controller_test.go +++ b/pkg/controllers/tagging/tagging_controller_test.go @@ -15,7 +15,6 @@ package tagging import ( "context" - "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/informers" @@ -80,12 +79,7 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { defer w.Stop() tc.enqueueNode(testcase.currNode, true) - tc.MonitorNodes() - ec2, _ := awsServices.Compute("") - - assert.EqualValues(t, testcase.expectedCalls, ec2., - "expected cloud provider methods `%v` to be called but `%v` was called ", - testcase.expectedCalls, awsServices.MadeRequest.Calls) + tc.Process() }) } } diff --git a/pkg/providers/v1/aws_fakes.go b/pkg/providers/v1/aws_fakes.go index fbaf1d4636..6f41679d06 100644 --- a/pkg/providers/v1/aws_fakes.go +++ b/pkg/providers/v1/aws_fakes.go @@ -21,8 +21,6 @@ import ( "fmt" "sort" "strings" - "sync" - "time" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/autoscaling" @@ -50,14 +48,6 @@ type FakeAWSServices struct { kms *FakeKMS } -type FakeCloud struct { - cloud *Cloud - - Calls []string - addCallLock sync.Mutex - RequestDelay time.Duration -} - // NewFakeAWSServices creates a new FakeAWSServices func NewFakeAWSServices(clusterID string) *FakeAWSServices { s := &FakeAWSServices{} @@ -274,7 +264,6 @@ func (ec2i *FakeEC2Impl) RemoveSubnets() { } func (ec2i *FakeEC2Impl) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) { - addCall("create-tags") for _, id := range input.Resources { if *id == "i-error" { return nil, errors.New("Unable to tag") @@ -284,7 +273,6 @@ func (ec2i *FakeEC2Impl) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTags } func (ec2i *FakeEC2Impl) DeleteTags(input *ec2.DeleteTagsInput) (*ec2.DeleteTagsOutput, error) { - ec2i.addCall("delete-tags") for _, id := range input.Resources { if *id == "i-error" { return nil, errors.New("Unable to remove tag") @@ -293,20 +281,6 @@ func (ec2i *FakeEC2Impl) DeleteTags(input *ec2.DeleteTagsInput) (*ec2.DeleteTags return &ec2.DeleteTagsOutput{}, nil } -func (f *FakeCloud) addCall(desc string) { - f.addCallLock.Lock() - defer f.addCallLock.Unlock() - - time.Sleep(f.RequestDelay) - - f.Calls = append(f.Calls, desc) -} - -// ClearCalls clears internal record of method calls to this Cloud. -func (f *FakeCloud) ClearCalls() { - f.Calls = []string{} -} - // DescribeRouteTables returns fake route table descriptions func (ec2i *FakeEC2Impl) DescribeRouteTables(request *ec2.DescribeRouteTablesInput) ([]*ec2.RouteTable, error) { ec2i.DescribeRouteTablesInput = request From cbfcf9d9c75a2ad43d13ac43afd1fc5c46c4923b Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Sat, 26 Mar 2022 13:04:43 -0700 Subject: [PATCH 28/40] Added more logging --- pkg/controllers/tagging/tagging_controller.go | 26 +++++--- .../tagging/tagging_controller_test.go | 60 +++++++++++++++++-- pkg/providers/v1/tags.go | 8 ++- 3 files changed, 79 insertions(+), 15 deletions(-) diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index a432b13787..e193aedaa0 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -32,9 +32,9 @@ import ( ) const ( - // This is a prefix used to recognized if a node in the workqueue - // is to be tagged or not - tagKeyPrefix string = "tagKeyPrefix" + // This is a prefix used to recognized if a node + // in the workqueue is to be tagged or not + tagKeyPrefix string = "ToBeTagged:" ) // TaggingController is the controller implementation for tagging cluster resources. @@ -121,6 +121,8 @@ func (tc *TaggingController) Process() bool { return false } + klog.Infof("Starting to process %v", obj) + err := func(obj interface{}) error { defer tc.workqueue.Done(obj) @@ -145,6 +147,7 @@ func (tc *TaggingController) Process() bool { node, err := tc.nodeInformer.Lister().Get(nodeName) if err != nil { if apierrors.IsNotFound(err) { + klog.Errorf("Unable to find a node with name %s", nodeName) return nil } @@ -152,9 +155,10 @@ func (tc *TaggingController) Process() bool { } if toBeTagged { + key = tagKeyPrefix + key if err := tc.tagNodesResources(node); err != nil { // Put the item back on the workqueue to handle any transient errors. - tc.workqueue.AddRateLimited(tagKeyPrefix + key) + tc.workqueue.AddRateLimited(key) return fmt.Errorf("error tagging '%s': %s, requeuing", key, err.Error()) } } else { @@ -165,10 +169,12 @@ func (tc *TaggingController) Process() bool { } tc.workqueue.Forget(obj) + klog.Infof("Finished processing %v", obj) return nil }(obj) if err != nil { + klog.Errorf("Error occurred while processing %v", obj) utilruntime.HandleError(err) } @@ -208,6 +214,8 @@ func (tc *TaggingController) tagEc2Instance(node *v1.Node) error { } } + klog.Infof("Successfully tagged %s with %v", instanceId, tc.tags) + return nil } @@ -244,6 +252,8 @@ func (tc *TaggingController) untagEc2Instance(node *v1.Node) error { } } + klog.Infof("Successfully tagged %s with %v", instanceId, tc.tags) + return nil } @@ -258,10 +268,12 @@ func (tc *TaggingController) enqueueNode(obj interface{}, toBeTagged bool) { } if toBeTagged { - tc.workqueue.Add(tagKeyPrefix + key) - } else { - tc.workqueue.Add(key) + key = tagKeyPrefix + key } + + tc.workqueue.Add(key) + + klog.Infof("Added %s to the workqueue", key) } // getActionAndKey from the provided key, check if the object is to be tagged diff --git a/pkg/controllers/tagging/tagging_controller_test.go b/pkg/controllers/tagging/tagging_controller_test.go index c58f9435ba..5cda9fc03b 100644 --- a/pkg/controllers/tagging/tagging_controller_test.go +++ b/pkg/controllers/tagging/tagging_controller_test.go @@ -32,10 +32,25 @@ const TestClusterID = "clusterid.test" func Test_NodesJoiningAndLeaving(t *testing.T) { testcases := []struct { - name string - currNode *v1.Node - expectedCalls []string + name string + currNode *v1.Node + noOfItemLeft int + toBeTagged bool }{ + { + name: "node0 joins the cluster, but fail to tag.", + currNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "i-error", + }, + }, + noOfItemLeft: 1, + toBeTagged: true, + }, { name: "node0 joins the cluster.", currNode: &v1.Node{ @@ -44,10 +59,39 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), }, Spec: v1.NodeSpec{ - ProviderID: "i-00000", + ProviderID: "i-0001", + }, + }, + noOfItemLeft: 0, + toBeTagged: true, + }, + { + name: "node0 leaves the cluster, failed to tag.", + currNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "i-error", + }, + }, + noOfItemLeft: 1, + toBeTagged: false, + }, + { + name: "node0 leaves the cluster.", + currNode: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Spec: v1.NodeSpec{ + ProviderID: "i-0001", }, }, - expectedCalls: []string{"create-tags"}, + noOfItemLeft: 0, + toBeTagged: false, }, } @@ -78,8 +122,12 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { w := eventBroadcaster.StartLogging(klog.Infof) defer w.Stop() - tc.enqueueNode(testcase.currNode, true) + tc.enqueueNode(testcase.currNode, testcase.toBeTagged) tc.Process() + + if tc.workqueue.Len() != testcase.noOfItemLeft { + t.Fatalf("workqueue not processed properly, expected %d left, got %d.", testcase.noOfItemLeft, tc.workqueue.Len()) + } }) } } diff --git a/pkg/providers/v1/tags.go b/pkg/providers/v1/tags.go index bc66436c12..5ec40e81ce 100644 --- a/pkg/providers/v1/tags.go +++ b/pkg/providers/v1/tags.go @@ -317,13 +317,15 @@ func (c *Cloud) TagResource(resourceId string, tags map[string]string) error { Tags: buildAwsTags(tags), } - _, err := c.ec2.CreateTags(request) + output, err := c.ec2.CreateTags(request) if err != nil { klog.Errorf("Error occurred trying to tag resources, %v", err) return err } + klog.Infof("Done calling create-tags to EC2: %v", output) + return nil } @@ -333,13 +335,15 @@ func (c *Cloud) UntagResource(resourceId string, tags map[string]string) error { Tags: buildAwsTags(tags), } - _, err := c.ec2.DeleteTags(request) + output, err := c.ec2.DeleteTags(request) if err != nil { klog.Errorf("Error occurred trying to untag resources, %v", err) return err } + klog.Infof("Done calling delete-tags to EC2: %v", output) + return nil } From 75f68d74832d200db80f875c0c4df675f2b974e0 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Sat, 26 Mar 2022 15:53:27 -0700 Subject: [PATCH 29/40] Added more testing --- pkg/controllers/tagging/tagging_controller.go | 4 +- .../tagging/tagging_controller_test.go | 57 ++++++++++++------- 2 files changed, 38 insertions(+), 23 deletions(-) diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index e193aedaa0..1d1b948bb9 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -26,7 +26,7 @@ import ( cloudprovider "k8s.io/cloud-provider" opt "k8s.io/cloud-provider-aws/pkg/controllers/options" awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" - "k8s.io/klog/v2" + "k8s.io/klog" "strings" "time" ) @@ -252,7 +252,7 @@ func (tc *TaggingController) untagEc2Instance(node *v1.Node) error { } } - klog.Infof("Successfully tagged %s with %v", instanceId, tc.tags) + klog.Infof("Successfully untagged %s with %v", instanceId, tc.tags) return nil } diff --git a/pkg/controllers/tagging/tagging_controller_test.go b/pkg/controllers/tagging/tagging_controller_test.go index 5cda9fc03b..6b5268f50e 100644 --- a/pkg/controllers/tagging/tagging_controller_test.go +++ b/pkg/controllers/tagging/tagging_controller_test.go @@ -14,16 +14,19 @@ limitations under the License. package tagging import ( + "bytes" "context" + "flag" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/informers" coreinformers "k8s.io/client-go/informers/core/v1" "k8s.io/client-go/kubernetes/fake" - "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" - "k8s.io/klog/v2" + "k8s.io/klog" + "os" + "strings" "testing" "time" ) @@ -31,11 +34,13 @@ import ( const TestClusterID = "clusterid.test" func Test_NodesJoiningAndLeaving(t *testing.T) { + klog.InitFlags(nil) + flag.CommandLine.Parse([]string{"--logtostderr=false"}) testcases := []struct { - name string - currNode *v1.Node - noOfItemLeft int - toBeTagged bool + name string + currNode *v1.Node + toBeTagged bool + expectedMessages []string }{ { name: "node0 joins the cluster, but fail to tag.", @@ -48,8 +53,8 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { ProviderID: "i-error", }, }, - noOfItemLeft: 1, - toBeTagged: true, + toBeTagged: true, + expectedMessages: []string{"Error occurred while processing ToBeTagged:node0"}, }, { name: "node0 joins the cluster.", @@ -62,11 +67,11 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { ProviderID: "i-0001", }, }, - noOfItemLeft: 0, - toBeTagged: true, + toBeTagged: true, + expectedMessages: []string{"Successfully tagged i-0001"}, }, { - name: "node0 leaves the cluster, failed to tag.", + name: "node0 leaves the cluster, failed to untag.", currNode: &v1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "node0", @@ -76,8 +81,8 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { ProviderID: "i-error", }, }, - noOfItemLeft: 1, - toBeTagged: false, + toBeTagged: false, + expectedMessages: []string{"Error in untagging EC2 instance for node node0"}, }, { name: "node0 leaves the cluster.", @@ -90,8 +95,8 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { ProviderID: "i-0001", }, }, - noOfItemLeft: 0, - toBeTagged: false, + toBeTagged: false, + expectedMessages: []string{"Successfully untagged i-0001"}, }, } @@ -100,6 +105,12 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { for _, testcase := range testcases { t.Run(testcase.name, func(t *testing.T) { + var logBuf bytes.Buffer + klog.SetOutput(&logBuf) + defer func() { + klog.SetOutput(os.Stderr) + }() + clientset := fake.NewSimpleClientset(testcase.currNode) informer := informers.NewSharedInformerFactory(clientset, time.Second) nodeInformer := informer.Core().V1().Nodes() @@ -108,7 +119,7 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { t.Errorf("unexpected error: %v", err) } - eventBroadcaster := record.NewBroadcaster() + //eventBroadcaster := record.NewBroadcaster() tc := &TaggingController{ nodeInformer: nodeInformer, kubeClient: clientset, @@ -119,14 +130,18 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "Tagging"), } - w := eventBroadcaster.StartLogging(klog.Infof) - defer w.Stop() - tc.enqueueNode(testcase.currNode, testcase.toBeTagged) tc.Process() - if tc.workqueue.Len() != testcase.noOfItemLeft { - t.Fatalf("workqueue not processed properly, expected %d left, got %d.", testcase.noOfItemLeft, tc.workqueue.Len()) + for _, msg := range testcase.expectedMessages { + if !strings.Contains(logBuf.String(), msg) { + t.Errorf("\nMsg %q not found in log: \n%v\n", msg, logBuf.String()) + } + if strings.Contains(logBuf.String(), "error tagging ") || strings.Contains(logBuf.String(), "error untagging ") { + if !strings.Contains(logBuf.String(), ", requeuing") { + t.Errorf("\nFailed to tag or untag but logs do not contain 'requeueing': \n%v\n", logBuf.String()) + } + } } }) } From 78b6a3048fe5af03b35c1cf8ed7ba759837ae960 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Mon, 28 Mar 2022 11:12:28 -0700 Subject: [PATCH 30/40] cosmetic change --- pkg/controllers/options/tagging_controller.go | 2 +- pkg/controllers/tagging/tagging_controller.go | 2 +- pkg/controllers/tagging/tagging_controller_test.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/controllers/options/tagging_controller.go b/pkg/controllers/options/tagging_controller.go index 52cc6593aa..e3be07dfc5 100644 --- a/pkg/controllers/options/tagging_controller.go +++ b/pkg/controllers/options/tagging_controller.go @@ -30,7 +30,7 @@ func (o *TaggingControllerOptions) AddFlags(fs *pflag.FlagSet) { func (o *TaggingControllerOptions) Validate() error { if len(o.Tags) == 0 { - return fmt.Errorf("--tags must not be empty and must be a form of key:value") + return fmt.Errorf("--tags must not be empty and must be a form of key=value") } if len(o.Resources) == 0 { diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index 1d1b948bb9..d2cb528144 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -26,7 +26,7 @@ import ( cloudprovider "k8s.io/cloud-provider" opt "k8s.io/cloud-provider-aws/pkg/controllers/options" awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" - "k8s.io/klog" + "k8s.io/klog/v2" "strings" "time" ) diff --git a/pkg/controllers/tagging/tagging_controller_test.go b/pkg/controllers/tagging/tagging_controller_test.go index 6b5268f50e..0dceefa13d 100644 --- a/pkg/controllers/tagging/tagging_controller_test.go +++ b/pkg/controllers/tagging/tagging_controller_test.go @@ -24,7 +24,7 @@ import ( "k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/util/workqueue" awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" - "k8s.io/klog" + "k8s.io/klog/v2" "os" "strings" "testing" From 101e3f119dde64a558debcd5fa948046760e162e Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Mon, 28 Mar 2022 12:24:50 -0700 Subject: [PATCH 31/40] use array instead of map for supported resources --- pkg/controllers/options/resources.go | 4 ++-- pkg/controllers/options/tagging_controller.go | 14 +++++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/pkg/controllers/options/resources.go b/pkg/controllers/options/resources.go index 381fcde6f0..df0ee14c83 100644 --- a/pkg/controllers/options/resources.go +++ b/pkg/controllers/options/resources.go @@ -16,6 +16,6 @@ const ( Instance string = "instance" ) -var SupportedResources = map[string]string{ - "instance": Instance, +var SupportedResources = []string{ + Instance, } diff --git a/pkg/controllers/options/tagging_controller.go b/pkg/controllers/options/tagging_controller.go index e3be07dfc5..d79a7bf265 100644 --- a/pkg/controllers/options/tagging_controller.go +++ b/pkg/controllers/options/tagging_controller.go @@ -38,12 +38,16 @@ func (o *TaggingControllerOptions) Validate() error { } for _, r := range o.Resources { - if _, ok := SupportedResources[r]; !ok { - resources := []string{} - for r, _ := range SupportedResources { - resources = append(resources, r) + found := false + + for _, resource := range SupportedResources { + if r == resource { + found = true } - return fmt.Errorf("%s is not a supported resource. Current supported resources %v", r, resources) + } + + if !found { + return fmt.Errorf("%s is not a supported resource. Current supported resources %v", r, SupportedResources) } } From 58596908b50ddd923cffd50126382bacaf8c01ca Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Mon, 28 Mar 2022 14:28:51 -0700 Subject: [PATCH 32/40] Reworked the workqueue with workitem --- pkg/controllers/tagging/tagging_controller.go | 109 ++++++------------ .../tagging/tagging_controller_test.go | 8 +- 2 files changed, 40 insertions(+), 77 deletions(-) diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index d2cb528144..21c713b5e0 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -16,7 +16,6 @@ package tagging import ( "fmt" v1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" coreinformers "k8s.io/client-go/informers/core/v1" @@ -27,15 +26,14 @@ import ( opt "k8s.io/cloud-provider-aws/pkg/controllers/options" awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" "k8s.io/klog/v2" - "strings" "time" ) -const ( - // This is a prefix used to recognized if a node - // in the workqueue is to be tagged or not - tagKeyPrefix string = "ToBeTagged:" -) +// workItem contains the node and an action for that node +type workItem struct { + node *v1.Node + action func(node *v1.Node) error +} // TaggingController is the controller implementation for tagging cluster resources. // It periodically check for Node events (creating/deleting) to apply appropriate @@ -45,7 +43,7 @@ type TaggingController struct { kubeClient clientset.Interface cloud *awsv1.Cloud workqueue workqueue.RateLimitingInterface - + nodesSynced cache.InformerSynced // Value controlling TaggingController monitoring period, i.e. how often does TaggingController // check node list. This value should be lower than nodeMonitorGracePeriod // set in controller-manager @@ -81,14 +79,15 @@ func NewTaggingController( tags: tags, resources: resources, workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "Tagging"), + nodesSynced: nodeInformer.Informer().HasSynced, } // Use shared informer to listen to add/update/delete of nodes. Note that any nodes // that exist before tagging controller starts will show up in the update method tc.nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { tc.enqueueNode(obj, true) }, - UpdateFunc: func(oldObj, newObj interface{}) { tc.enqueueNode(newObj, true) }, - DeleteFunc: func(obj interface{}) { tc.enqueueNode(obj, false) }, + AddFunc: func(obj interface{}) { tc.enqueueNode(obj, tc.tagNodesResources) }, + UpdateFunc: func(oldObj, newObj interface{}) { tc.enqueueNode(newObj, tc.tagNodesResources) }, + DeleteFunc: func(obj interface{}) { tc.enqueueNode(obj, tc.untagNodeResources) }, }) return tc, nil @@ -100,6 +99,13 @@ func (tc *TaggingController) Run(stopCh <-chan struct{}) { defer utilruntime.HandleCrash() defer tc.workqueue.ShutDown() + // Wait for the caches to be synced before starting workers + klog.Info("Waiting for informer caches to sync") + if ok := cache.WaitForCacheSync(stopCh, tc.nodesSynced); !ok { + klog.Errorf("failed to wait for caches to sync") + return + } + klog.Infof("Starting the tagging controller") go wait.Until(tc.work, tc.nodeMonitorPeriod, stopCh) @@ -126,50 +132,22 @@ func (tc *TaggingController) Process() bool { err := func(obj interface{}) error { defer tc.workqueue.Done(obj) - var key string - var ok bool - if key, ok = obj.(string); !ok { + workItem, ok := obj.(*workItem) + if !ok { tc.workqueue.Forget(obj) - utilruntime.HandleError(fmt.Errorf("expected string in workqueue but got %#v", obj)) + utilruntime.HandleError(fmt.Errorf("expected workItem in workqueue but got %#v", obj)) return nil } - var toBeTagged bool - toBeTagged, key = tc.getActionAndKey(key) - - _, nodeName, err := cache.SplitMetaNamespaceKey(key) - + err := workItem.action(workItem.node) if err != nil { - utilruntime.HandleError(fmt.Errorf("invalid resource key: %s", key)) - return nil - } - - node, err := tc.nodeInformer.Lister().Get(nodeName) - if err != nil { - if apierrors.IsNotFound(err) { - klog.Errorf("Unable to find a node with name %s", nodeName) - return nil - } - - return err - } - - if toBeTagged { - key = tagKeyPrefix + key - if err := tc.tagNodesResources(node); err != nil { - // Put the item back on the workqueue to handle any transient errors. - tc.workqueue.AddRateLimited(key) - return fmt.Errorf("error tagging '%s': %s, requeuing", key, err.Error()) - } - } else { - if err := tc.untagNodeResources(node); err != nil { - tc.workqueue.AddRateLimited(key) - return fmt.Errorf("error untagging '%s': %s, requeuing", key, err.Error()) - } + // Put the item back on the workqueue to handle any transient errors. + tc.workqueue.AddRateLimited(workItem) + return fmt.Errorf("error finishing work item '%v': %s, requeuing", workItem, err.Error()) } tc.workqueue.Forget(obj) - klog.Infof("Finished processing %v", obj) + klog.Infof("Finished processing %v", workItem) return nil }(obj) @@ -257,33 +235,14 @@ func (tc *TaggingController) untagEc2Instance(node *v1.Node) error { return nil } -// enqueueNode takes in the object to enqueue to the workqueue and whether -// the object is to be tagged -func (tc *TaggingController) enqueueNode(obj interface{}, toBeTagged bool) { - var key string - var err error - if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil { - utilruntime.HandleError(err) - return - } - - if toBeTagged { - key = tagKeyPrefix + key - } - - tc.workqueue.Add(key) - - klog.Infof("Added %s to the workqueue", key) -} - -// getActionAndKey from the provided key, check if the object is to be tagged -// and extract that action together with the key -func (tc *TaggingController) getActionAndKey(key string) (bool, string) { - toBeTagged := false - if strings.HasPrefix(key, tagKeyPrefix) { - toBeTagged = true - key = strings.TrimPrefix(key, tagKeyPrefix) +// enqueueNode takes in the object and an +// action for the object for a workitem and enqueue to the workqueue +func (tc *TaggingController) enqueueNode(obj interface{}, action func(node *v1.Node) error) { + node := obj.(*v1.Node) + item := &workItem{ + node: node, + action: action, } - - return toBeTagged, key + tc.workqueue.Add(item) + klog.Infof("Added %s to the workqueue", item) } diff --git a/pkg/controllers/tagging/tagging_controller_test.go b/pkg/controllers/tagging/tagging_controller_test.go index 0dceefa13d..253f869803 100644 --- a/pkg/controllers/tagging/tagging_controller_test.go +++ b/pkg/controllers/tagging/tagging_controller_test.go @@ -54,7 +54,7 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { }, }, toBeTagged: true, - expectedMessages: []string{"Error occurred while processing ToBeTagged:node0"}, + expectedMessages: []string{"Error occurred while processing"}, }, { name: "node0 joins the cluster.", @@ -130,7 +130,11 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "Tagging"), } - tc.enqueueNode(testcase.currNode, testcase.toBeTagged) + if testcase.toBeTagged { + tc.enqueueNode(testcase.currNode, tc.tagNodesResources) + } else { + tc.enqueueNode(testcase.currNode, tc.untagNodeResources) + } tc.Process() for _, msg := range testcase.expectedMessages { From d4a1ce66442ecd0784191d45e51af1b9a6b654b9 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Tue, 29 Mar 2022 13:19:05 -0700 Subject: [PATCH 33/40] Addressed comments --- pkg/controllers/options/tagging_controller.go | 2 +- pkg/controllers/tagging/tagging_controller.go | 6 +++--- pkg/controllers/tagging/tagging_controller_test.go | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/controllers/options/tagging_controller.go b/pkg/controllers/options/tagging_controller.go index d79a7bf265..e25c8169e0 100644 --- a/pkg/controllers/options/tagging_controller.go +++ b/pkg/controllers/options/tagging_controller.go @@ -24,7 +24,7 @@ type TaggingControllerOptions struct { } func (o *TaggingControllerOptions) AddFlags(fs *pflag.FlagSet) { - fs.StringToStringVar(&o.Tags, "tags", o.Tags, "Tags to apply to AWS resources in the tagging controller.") + fs.StringToStringVar(&o.Tags, "tags", o.Tags, "Tags to apply to AWS resources in the tagging controller, in a form of key=value.") fs.StringArrayVar(&o.Resources, "resources", o.Resources, "AWS resources name to add/remove tags in the tagging controller.") } diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index 21c713b5e0..282f01ac90 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -115,13 +115,13 @@ func (tc *TaggingController) Run(stopCh <-chan struct{}) { // work is a long-running function that continuously // call process() for each message on the workqueue func (tc *TaggingController) work() { - for tc.Process() { + for tc.process() { } } -// Process reads each message in the queue and performs either +// process reads each message in the queue and performs either // tag or untag function on the Node object -func (tc *TaggingController) Process() bool { +func (tc *TaggingController) process() bool { obj, shutdown := tc.workqueue.Get() if shutdown { return false diff --git a/pkg/controllers/tagging/tagging_controller_test.go b/pkg/controllers/tagging/tagging_controller_test.go index 253f869803..7ee4404d5d 100644 --- a/pkg/controllers/tagging/tagging_controller_test.go +++ b/pkg/controllers/tagging/tagging_controller_test.go @@ -135,7 +135,7 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { } else { tc.enqueueNode(testcase.currNode, tc.untagNodeResources) } - tc.Process() + tc.process() for _, msg := range testcase.expectedMessages { if !strings.Contains(logBuf.String(), msg) { From f094e381f6ff04f0dccf223e93ed227d26b4944e Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Tue, 29 Mar 2022 13:25:56 -0700 Subject: [PATCH 34/40] addressed verify-lint errors --- pkg/providers/v1/tags.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pkg/providers/v1/tags.go b/pkg/providers/v1/tags.go index 5ec40e81ce..01512ec3a5 100644 --- a/pkg/providers/v1/tags.go +++ b/pkg/providers/v1/tags.go @@ -311,9 +311,11 @@ func (t *awsTagging) clusterID() string { return t.ClusterID } -func (c *Cloud) TagResource(resourceId string, tags map[string]string) error { +// TagResource calls EC2 and tag the resource associated to resourceID +// with the supplied tags +func (c *Cloud) TagResource(resourceID string, tags map[string]string) error { request := &ec2.CreateTagsInput{ - Resources: []*string{aws.String(resourceId)}, + Resources: []*string{aws.String(resourceID)}, Tags: buildAwsTags(tags), } @@ -329,9 +331,11 @@ func (c *Cloud) TagResource(resourceId string, tags map[string]string) error { return nil } -func (c *Cloud) UntagResource(resourceId string, tags map[string]string) error { +// UntagResource calls EC2 and tag the resource associated to resourceID +// with the supplied tags +func (c *Cloud) UntagResource(resourceID string, tags map[string]string) error { request := &ec2.DeleteTagsInput{ - Resources: []*string{aws.String(resourceId)}, + Resources: []*string{aws.String(resourceID)}, Tags: buildAwsTags(tags), } From c669f82d859de0816b263b7d101ff9f3bd142058 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Tue, 29 Mar 2022 13:33:12 -0700 Subject: [PATCH 35/40] addressed comments and verify-lint --- pkg/controllers/options/tagging_controller.go | 10 ++-------- pkg/controllers/tagging/tagging_controller_wrapper.go | 6 +++--- pkg/providers/v1/aws.go | 1 + pkg/providers/v1/aws_fakes.go | 2 ++ 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/pkg/controllers/options/tagging_controller.go b/pkg/controllers/options/tagging_controller.go index e25c8169e0..f182bd6adb 100644 --- a/pkg/controllers/options/tagging_controller.go +++ b/pkg/controllers/options/tagging_controller.go @@ -38,17 +38,11 @@ func (o *TaggingControllerOptions) Validate() error { } for _, r := range o.Resources { - found := false - for _, resource := range SupportedResources { - if r == resource { - found = true + if r != resource { + return fmt.Errorf("%s is not a supported resource. Current supported resources %v", r, SupportedResources) } } - - if !found { - return fmt.Errorf("%s is not a supported resource. Current supported resources %v", r, SupportedResources) - } } return nil diff --git a/pkg/controllers/tagging/tagging_controller_wrapper.go b/pkg/controllers/tagging/tagging_controller_wrapper.go index f250ab78f0..cef64d5bef 100644 --- a/pkg/controllers/tagging/tagging_controller_wrapper.go +++ b/pkg/controllers/tagging/tagging_controller_wrapper.go @@ -18,18 +18,18 @@ const ( TaggingControllerKey = "tagging" ) -type TaggingControllerWrapper struct { +type ControllerWrapper struct { Options options.TaggingControllerOptions } // StartTaggingControllerWrapper is used to take cloud config as input and start the tagging controller -func (tc *TaggingControllerWrapper) StartTaggingControllerWrapper(initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) app.InitFunc { +func (tc *ControllerWrapper) StartTaggingControllerWrapper(initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) app.InitFunc { return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) { return tc.startTaggingController(ctx, initContext, completedConfig, cloud) } } -func (tc *TaggingControllerWrapper) startTaggingController(ctx context.Context, initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) (controller.Interface, bool, error) { +func (tc *ControllerWrapper) startTaggingController(ctx context.Context, initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) (controller.Interface, bool, error) { err := tc.Options.Validate() if err != nil { klog.Fatalf("Tagging controller inputs are not properly set: %v", err) diff --git a/pkg/providers/v1/aws.go b/pkg/providers/v1/aws.go index b6f6ede506..82ac34e580 100644 --- a/pkg/providers/v1/aws.go +++ b/pkg/providers/v1/aws.go @@ -1417,6 +1417,7 @@ func newAWSCloud(cfg CloudConfig, awsServices Services) (*Cloud, error) { return awsCloud, nil } +// NewAWSCloud calls and return new aws cloud from newAWSCloud with the supplied configuration func NewAWSCloud(cfg CloudConfig, awsServices Services) (*Cloud, error) { return newAWSCloud(cfg, awsServices) } diff --git a/pkg/providers/v1/aws_fakes.go b/pkg/providers/v1/aws_fakes.go index 6f41679d06..acd21f7539 100644 --- a/pkg/providers/v1/aws_fakes.go +++ b/pkg/providers/v1/aws_fakes.go @@ -263,6 +263,7 @@ func (ec2i *FakeEC2Impl) RemoveSubnets() { ec2i.Subnets = ec2i.Subnets[:0] } +// Mock CreateTags from EC2 func (ec2i *FakeEC2Impl) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) { for _, id := range input.Resources { if *id == "i-error" { @@ -272,6 +273,7 @@ func (ec2i *FakeEC2Impl) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTags return &ec2.CreateTagsOutput{}, nil } +// Mock DeleteTags from EC2 func (ec2i *FakeEC2Impl) DeleteTags(input *ec2.DeleteTagsInput) (*ec2.DeleteTagsOutput, error) { for _, id := range input.Resources { if *id == "i-error" { From f54a5b2ed043ebc237f24e4350206813ad8af562 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Tue, 29 Mar 2022 13:41:08 -0700 Subject: [PATCH 36/40] address validate-lint error --- pkg/controllers/options/resources.go | 3 + pkg/controllers/options/tagging_controller.go | 4 ++ pkg/controllers/tagging/tagging_controller.go | 58 +++++++++---------- .../tagging/tagging_controller_test.go | 2 +- .../tagging/tagging_controller_wrapper.go | 7 ++- 5 files changed, 42 insertions(+), 32 deletions(-) diff --git a/pkg/controllers/options/resources.go b/pkg/controllers/options/resources.go index df0ee14c83..0c7201b5f5 100644 --- a/pkg/controllers/options/resources.go +++ b/pkg/controllers/options/resources.go @@ -10,12 +10,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ + package options const ( + // Instance presenting the string literal "instance" Instance string = "instance" ) +// SupportedResources contains the resources that can be tagged by the controller at the moment var SupportedResources = []string{ Instance, } diff --git a/pkg/controllers/options/tagging_controller.go b/pkg/controllers/options/tagging_controller.go index f182bd6adb..3c1de76724 100644 --- a/pkg/controllers/options/tagging_controller.go +++ b/pkg/controllers/options/tagging_controller.go @@ -18,16 +18,20 @@ import ( "github.com/spf13/pflag" ) +// TaggingControllerOptions contains the inputs that can +// be used in the tagging controller type TaggingControllerOptions struct { Tags map[string]string Resources []string } +// AddFlags add the additional flags for the controller func (o *TaggingControllerOptions) AddFlags(fs *pflag.FlagSet) { fs.StringToStringVar(&o.Tags, "tags", o.Tags, "Tags to apply to AWS resources in the tagging controller, in a form of key=value.") fs.StringArrayVar(&o.Resources, "resources", o.Resources, "AWS resources name to add/remove tags in the tagging controller.") } +// Validate checks for errors from user input func (o *TaggingControllerOptions) Validate() error { if len(o.Tags) == 0 { return fmt.Errorf("--tags must not be empty and must be a form of key=value") diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index 282f01ac90..fc84f2b5ae 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -35,16 +35,16 @@ type workItem struct { action func(node *v1.Node) error } -// TaggingController is the controller implementation for tagging cluster resources. +// Controller is the controller implementation for tagging cluster resources. // It periodically check for Node events (creating/deleting) to apply appropriate // tags to resources. -type TaggingController struct { +type Controller struct { nodeInformer coreinformers.NodeInformer kubeClient clientset.Interface cloud *awsv1.Cloud workqueue workqueue.RateLimitingInterface nodesSynced cache.InformerSynced - // Value controlling TaggingController monitoring period, i.e. how often does TaggingController + // Value controlling Controller monitoring period, i.e. how often does Controller // check node list. This value should be lower than nodeMonitorGracePeriod // set in controller-manager nodeMonitorPeriod time.Duration @@ -63,7 +63,7 @@ func NewTaggingController( cloud cloudprovider.Interface, nodeMonitorPeriod time.Duration, tags map[string]string, - resources []string) (*TaggingController, error) { + resources []string) (*Controller, error) { awsCloud, ok := cloud.(*awsv1.Cloud) if !ok { @@ -71,7 +71,7 @@ func NewTaggingController( return nil, err } - tc := &TaggingController{ + tc := &Controller{ nodeInformer: nodeInformer, kubeClient: kubeClient, cloud: awsCloud, @@ -95,7 +95,7 @@ func NewTaggingController( // Run will start the controller to tag resources attached to the cluster // and untag resources detached from the cluster. -func (tc *TaggingController) Run(stopCh <-chan struct{}) { +func (tc *Controller) Run(stopCh <-chan struct{}) { defer utilruntime.HandleCrash() defer tc.workqueue.ShutDown() @@ -114,14 +114,14 @@ func (tc *TaggingController) Run(stopCh <-chan struct{}) { // work is a long-running function that continuously // call process() for each message on the workqueue -func (tc *TaggingController) work() { +func (tc *Controller) work() { for tc.process() { } } // process reads each message in the queue and performs either // tag or untag function on the Node object -func (tc *TaggingController) process() bool { +func (tc *Controller) process() bool { obj, shutdown := tc.workqueue.Get() if shutdown { return false @@ -161,7 +161,7 @@ func (tc *TaggingController) process() bool { // tagNodesResources tag node resources from a list of nodes // If we want to tag more resources, modify this function appropriately -func (tc *TaggingController) tagNodesResources(node *v1.Node) error { +func (tc *Controller) tagNodesResources(node *v1.Node) error { for _, resource := range tc.resources { switch resource { case opt.Instance: @@ -177,29 +177,29 @@ func (tc *TaggingController) tagNodesResources(node *v1.Node) error { // tagEc2Instances applies the provided tags to each EC2 instance in // the cluster. -func (tc *TaggingController) tagEc2Instance(node *v1.Node) error { - instanceId, err := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() +func (tc *Controller) tagEc2Instance(node *v1.Node) error { + instanceID, err := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() if err != nil { klog.Errorf("Error in getting instanceID for node %s, error: %v", node.GetName(), err) return err - } else { - err := tc.cloud.TagResource(string(instanceId), tc.tags) + } - if err != nil { - klog.Errorf("Error in tagging EC2 instance for node %s, error: %v", node.GetName(), err) - return err - } + err = tc.cloud.TagResource(string(instanceID), tc.tags) + + if err != nil { + klog.Errorf("Error in tagging EC2 instance for node %s, error: %v", node.GetName(), err) + return err } - klog.Infof("Successfully tagged %s with %v", instanceId, tc.tags) + klog.Infof("Successfully tagged %s with %v", instanceID, tc.tags) return nil } // untagNodeResources untag node resources from a list of nodes // If we want to untag more resources, modify this function appropriately -func (tc *TaggingController) untagNodeResources(node *v1.Node) error { +func (tc *Controller) untagNodeResources(node *v1.Node) error { for _, resource := range tc.resources { switch resource { case opt.Instance: @@ -215,29 +215,29 @@ func (tc *TaggingController) untagNodeResources(node *v1.Node) error { // untagEc2Instances deletes the provided tags to each EC2 instances in // the cluster. -func (tc *TaggingController) untagEc2Instance(node *v1.Node) error { - instanceId, err := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() +func (tc *Controller) untagEc2Instance(node *v1.Node) error { + instanceID, err := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() if err != nil { klog.Errorf("Error in getting instanceID for node %s, error: %v", node.GetName(), err) return err - } else { - err := tc.cloud.UntagResource(string(instanceId), tc.tags) + } - if err != nil { - klog.Errorf("Error in untagging EC2 instance for node %s, error: %v", node.GetName(), err) - return err - } + err = tc.cloud.UntagResource(string(instanceID), tc.tags) + + if err != nil { + klog.Errorf("Error in untagging EC2 instance for node %s, error: %v", node.GetName(), err) + return err } - klog.Infof("Successfully untagged %s with %v", instanceId, tc.tags) + klog.Infof("Successfully untagged %s with %v", instanceID, tc.tags) return nil } // enqueueNode takes in the object and an // action for the object for a workitem and enqueue to the workqueue -func (tc *TaggingController) enqueueNode(obj interface{}, action func(node *v1.Node) error) { +func (tc *Controller) enqueueNode(obj interface{}, action func(node *v1.Node) error) { node := obj.(*v1.Node) item := &workItem{ node: node, diff --git a/pkg/controllers/tagging/tagging_controller_test.go b/pkg/controllers/tagging/tagging_controller_test.go index 7ee4404d5d..8f35810b30 100644 --- a/pkg/controllers/tagging/tagging_controller_test.go +++ b/pkg/controllers/tagging/tagging_controller_test.go @@ -120,7 +120,7 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { } //eventBroadcaster := record.NewBroadcaster() - tc := &TaggingController{ + tc := &Controller{ nodeInformer: nodeInformer, kubeClient: clientset, cloud: fakeAws, diff --git a/pkg/controllers/tagging/tagging_controller_wrapper.go b/pkg/controllers/tagging/tagging_controller_wrapper.go index cef64d5bef..1734fb454e 100644 --- a/pkg/controllers/tagging/tagging_controller_wrapper.go +++ b/pkg/controllers/tagging/tagging_controller_wrapper.go @@ -14,8 +14,11 @@ import ( ) const ( + // TaggingControllerClientName is the name of the tagging controller TaggingControllerClientName = "tagging-controller" - TaggingControllerKey = "tagging" + + // TaggingControllerKey is the key used to register this controller + TaggingControllerKey = "tagging" ) type ControllerWrapper struct { @@ -35,7 +38,7 @@ func (tc *ControllerWrapper) startTaggingController(ctx context.Context, initCon klog.Fatalf("Tagging controller inputs are not properly set: %v", err) } - // Start the TaggingController + // Start the Controller taggingcontroller, err := NewTaggingController( completedConfig.SharedInformers.Core().V1().Nodes(), completedConfig.ClientBuilder.ClientOrDie(initContext.ClientName), From 12ac5eec72682ce7e9f1e7f029b455baea40af1f Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Tue, 29 Mar 2022 13:45:27 -0700 Subject: [PATCH 37/40] missed a couple more lint errors --- pkg/controllers/tagging/tagging_controller_wrapper.go | 1 + pkg/providers/v1/aws_fakes.go | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/controllers/tagging/tagging_controller_wrapper.go b/pkg/controllers/tagging/tagging_controller_wrapper.go index 1734fb454e..f5d376b482 100644 --- a/pkg/controllers/tagging/tagging_controller_wrapper.go +++ b/pkg/controllers/tagging/tagging_controller_wrapper.go @@ -21,6 +21,7 @@ const ( TaggingControllerKey = "tagging" ) +// ControllerWrapper is the wrapper for the tagging controller type ControllerWrapper struct { Options options.TaggingControllerOptions } diff --git a/pkg/providers/v1/aws_fakes.go b/pkg/providers/v1/aws_fakes.go index acd21f7539..dee83d35b7 100644 --- a/pkg/providers/v1/aws_fakes.go +++ b/pkg/providers/v1/aws_fakes.go @@ -263,7 +263,7 @@ func (ec2i *FakeEC2Impl) RemoveSubnets() { ec2i.Subnets = ec2i.Subnets[:0] } -// Mock CreateTags from EC2 +// CreateTags is a mock for CreateTags from EC2 func (ec2i *FakeEC2Impl) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) { for _, id := range input.Resources { if *id == "i-error" { @@ -273,7 +273,7 @@ func (ec2i *FakeEC2Impl) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTags return &ec2.CreateTagsOutput{}, nil } -// Mock DeleteTags from EC2 +// DeleteTags is a mock for DeleteTags from EC2 func (ec2i *FakeEC2Impl) DeleteTags(input *ec2.DeleteTagsInput) (*ec2.DeleteTagsOutput, error) { for _, id := range input.Resources { if *id == "i-error" { From 4073667293ac1f2177934bb1d7c019081fba63ee Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Tue, 29 Mar 2022 13:50:12 -0700 Subject: [PATCH 38/40] Updated doc to be clearer --- docs/tagging_controller.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tagging_controller.md b/docs/tagging_controller.md index 74161ab969..be4bd86c2e 100644 --- a/docs/tagging_controller.md +++ b/docs/tagging_controller.md @@ -1,6 +1,6 @@ # The Tagging Controller -The tagging controller is responsible for tagging and untagging node resources when they join and leave the cluster, respectively. It can add and remove tags based on user input. Unlike the existing controllers, the tagging controller works exclusively with AWS. The AWS APIs it uses are `ec2:CreateTags` and `ec2:DeleteTags`. +The tagging controller is responsible for tagging and untagging node resources when they join and leave the cluster, respectively. It can add and remove tags based on user input. Additionally, if a tag is updated, it would leave the updated tag and reapply the user-provided tag. Unlike the existing controllers, the tagging controller works exclusively with AWS. The AWS APIs it uses are `ec2:CreateTags` and `ec2:DeleteTags`. | Flag | Valid Values | Default | Description | |------| --- | --- | --- | From 1bf899d0f6c8b9ea806258ca23526debc4d9bd29 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Tue, 29 Mar 2022 18:26:19 -0700 Subject: [PATCH 39/40] Add TODOs for e2e testing and non-retryable workitem --- docs/TODO.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 docs/TODO.md diff --git a/docs/TODO.md b/docs/TODO.md new file mode 100644 index 0000000000..af4b88e4f7 --- /dev/null +++ b/docs/TODO.md @@ -0,0 +1,22 @@ +# TODO + +### Prereqs + +* Document required instance tags (i.e. KubernetesCluster:) + +### Load Balancers + +* document all available label/annotations to configure ELBs/NLBs for Service Type=LoadBalancer + +### Known Limitations + +* Document limitation with hostname / private DNS? + +### Kops + +* Add a full example (ideally with IAM roles) + +### Tagging Controller + +* Add e2e testing which enables the controller, and monitors if the resources are tagged properly +* Handle the case where potential non-retryable errors are enqeueued indefinitely. \ No newline at end of file From 32d63ccaa30d42a3521e2eb1e56f35898149aced Mon Sep 17 00:00:00 2001 From: Nguyen Dinh <92940366+nguyenkndinh@users.noreply.github.com> Date: Tue, 5 Apr 2022 16:38:32 -0700 Subject: [PATCH 40/40] Stop retrying failed workitem after a certain amount of retries Added metrics --- docs/TODO.md | 3 +- pkg/controllers/tagging/metrics.go | 56 ++++++++++++ pkg/controllers/tagging/tagging_controller.go | 91 +++++++++++++------ .../tagging/tagging_controller_test.go | 19 +++- 4 files changed, 135 insertions(+), 34 deletions(-) create mode 100644 pkg/controllers/tagging/metrics.go diff --git a/docs/TODO.md b/docs/TODO.md index af4b88e4f7..5013e2bdcb 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -18,5 +18,4 @@ ### Tagging Controller -* Add e2e testing which enables the controller, and monitors if the resources are tagged properly -* Handle the case where potential non-retryable errors are enqeueued indefinitely. \ No newline at end of file +* Add e2e testing which enables the controller, and monitors if the resources are tagged properly \ No newline at end of file diff --git a/pkg/controllers/tagging/metrics.go b/pkg/controllers/tagging/metrics.go new file mode 100644 index 0000000000..5a263086f2 --- /dev/null +++ b/pkg/controllers/tagging/metrics.go @@ -0,0 +1,56 @@ +/* +Copyright 2020 The Kubernetes Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tagging + +import ( + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" + "sync" +) + +var register sync.Once + +var ( + workItemDuration = metrics.NewHistogramVec( + &metrics.HistogramOpts{ + Name: "cloudprovider_aws_tagging_controller_work_item_duration_seconds", + Help: "workitem latency of workitem being in the queue and time it takes to process", + StabilityLevel: metrics.ALPHA, + }, + []string{"latency_type"}) + + workItemError = metrics.NewCounterVec( + &metrics.CounterOpts{ + Name: "cloudprovider_aws_tagging_controller_work_item_errors_total", + Help: "any error in dequeueing the work queue and processing workItem", + StabilityLevel: metrics.ALPHA, + }, + []string{"error_type", "instance_id"}) +) + +// registerMetrics registers tagging-controller metrics. +func registerMetrics() { + register.Do(func() { + legacyregistry.MustRegister(workItemDuration) + legacyregistry.MustRegister(workItemError) + }) +} + +func recordWorkItemLatencyMetrics(latencyType string, timeTaken float64) { + workItemDuration.With(metrics.Labels{"latency_type": latencyType}).Observe(timeTaken) +} + +func recordWorkItemErrorMetrics(errorType string, instanceID string) { + workItemError.With(metrics.Labels{"error_type": errorType, "instance_id": instanceID}).Inc() +} diff --git a/pkg/controllers/tagging/tagging_controller.go b/pkg/controllers/tagging/tagging_controller.go index fc84f2b5ae..9e56cb6337 100644 --- a/pkg/controllers/tagging/tagging_controller.go +++ b/pkg/controllers/tagging/tagging_controller.go @@ -31,12 +31,30 @@ import ( // workItem contains the node and an action for that node type workItem struct { - node *v1.Node - action func(node *v1.Node) error + node *v1.Node + action func(node *v1.Node) error + requeuingCount int + enqueueTime time.Time } +const ( + maxRequeuingCount = 9 + + // The label for depicting total number of errors a work item encounter and succeed + totalErrorsWorkItemErrorMetric = "total_errors" + + // The label for depicting total time when work item gets queued to processed + workItemProcessingTimeWorkItemMetric = "work_item_processing_time" + + // The label for depicting total time when work item gets queued to dequeued + workItemDequeuingTimeWorkItemMetric = "work_item_dequeuing_time" + + // The label for depicting total number of errors a work item encounter and fail + errorsAfterRetriesExhaustedWorkItemErrorMetric = "errors_after_retries_exhausted" +) + // Controller is the controller implementation for tagging cluster resources. -// It periodically check for Node events (creating/deleting) to apply appropriate +// It periodically checks for Node events (creating/deleting) to apply/delete appropriate // tags to resources. type Controller struct { nodeInformer coreinformers.NodeInformer @@ -44,6 +62,7 @@ type Controller struct { cloud *awsv1.Cloud workqueue workqueue.RateLimitingInterface nodesSynced cache.InformerSynced + // Value controlling Controller monitoring period, i.e. how often does Controller // check node list. This value should be lower than nodeMonitorGracePeriod // set in controller-manager @@ -71,15 +90,16 @@ func NewTaggingController( return nil, err } + registerMetrics() tc := &Controller{ nodeInformer: nodeInformer, kubeClient: kubeClient, cloud: awsCloud, - nodeMonitorPeriod: nodeMonitorPeriod, tags: tags, resources: resources, workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "Tagging"), nodesSynced: nodeInformer.Informer().HasSynced, + nodeMonitorPeriod: nodeMonitorPeriod, } // Use shared informer to listen to add/update/delete of nodes. Note that any nodes @@ -135,19 +155,42 @@ func (tc *Controller) process() bool { workItem, ok := obj.(*workItem) if !ok { tc.workqueue.Forget(obj) - utilruntime.HandleError(fmt.Errorf("expected workItem in workqueue but got %#v", obj)) + err := fmt.Errorf("expected workItem in workqueue but got %#v", obj) + utilruntime.HandleError(err) + return nil + } + + timeTaken := time.Since(workItem.enqueueTime).Seconds() + recordWorkItemLatencyMetrics(workItemDequeuingTimeWorkItemMetric, timeTaken) + + instanceID, err := awsv1.KubernetesInstanceID(workItem.node.Spec.ProviderID).MapToAWSInstanceID() + if err != nil { + err = fmt.Errorf("Error in getting instanceID for node %s, error: %v", workItem.node.GetName(), err) + utilruntime.HandleError(err) return nil } - err := workItem.action(workItem.node) + err = workItem.action(workItem.node) + if err != nil { - // Put the item back on the workqueue to handle any transient errors. - tc.workqueue.AddRateLimited(workItem) - return fmt.Errorf("error finishing work item '%v': %s, requeuing", workItem, err.Error()) + if workItem.requeuingCount < maxRequeuingCount { + // Put the item back on the workqueue to handle any transient errors. + workItem.requeuingCount++ + tc.workqueue.AddRateLimited(workItem) + + recordWorkItemErrorMetrics(totalErrorsWorkItemErrorMetric, string(instanceID)) + return fmt.Errorf("error processing work item '%v': %s, requeuing count %d", workItem, err.Error(), workItem.requeuingCount) + } + + klog.Errorf("error processing work item '%v': %s, requeuing count exceeded", workItem, err.Error()) + recordWorkItemErrorMetrics(errorsAfterRetriesExhaustedWorkItemErrorMetric, string(instanceID)) + } else { + klog.Infof("Finished processing %v", workItem) + timeTaken = time.Since(workItem.enqueueTime).Seconds() + recordWorkItemLatencyMetrics(workItemProcessingTimeWorkItemMetric, timeTaken) } tc.workqueue.Forget(obj) - klog.Infof("Finished processing %v", workItem) return nil }(obj) @@ -159,7 +202,7 @@ func (tc *Controller) process() bool { return true } -// tagNodesResources tag node resources from a list of nodes +// tagNodesResources tag node resources // If we want to tag more resources, modify this function appropriately func (tc *Controller) tagNodesResources(node *v1.Node) error { for _, resource := range tc.resources { @@ -178,14 +221,9 @@ func (tc *Controller) tagNodesResources(node *v1.Node) error { // tagEc2Instances applies the provided tags to each EC2 instance in // the cluster. func (tc *Controller) tagEc2Instance(node *v1.Node) error { - instanceID, err := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() + instanceID, _ := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() - if err != nil { - klog.Errorf("Error in getting instanceID for node %s, error: %v", node.GetName(), err) - return err - } - - err = tc.cloud.TagResource(string(instanceID), tc.tags) + err := tc.cloud.TagResource(string(instanceID), tc.tags) if err != nil { klog.Errorf("Error in tagging EC2 instance for node %s, error: %v", node.GetName(), err) @@ -197,7 +235,7 @@ func (tc *Controller) tagEc2Instance(node *v1.Node) error { return nil } -// untagNodeResources untag node resources from a list of nodes +// untagNodeResources untag node resources // If we want to untag more resources, modify this function appropriately func (tc *Controller) untagNodeResources(node *v1.Node) error { for _, resource := range tc.resources { @@ -216,14 +254,9 @@ func (tc *Controller) untagNodeResources(node *v1.Node) error { // untagEc2Instances deletes the provided tags to each EC2 instances in // the cluster. func (tc *Controller) untagEc2Instance(node *v1.Node) error { - instanceID, err := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() - - if err != nil { - klog.Errorf("Error in getting instanceID for node %s, error: %v", node.GetName(), err) - return err - } + instanceID, _ := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() - err = tc.cloud.UntagResource(string(instanceID), tc.tags) + err := tc.cloud.UntagResource(string(instanceID), tc.tags) if err != nil { klog.Errorf("Error in untagging EC2 instance for node %s, error: %v", node.GetName(), err) @@ -240,8 +273,10 @@ func (tc *Controller) untagEc2Instance(node *v1.Node) error { func (tc *Controller) enqueueNode(obj interface{}, action func(node *v1.Node) error) { node := obj.(*v1.Node) item := &workItem{ - node: node, - action: action, + node: node, + action: action, + requeuingCount: 0, + enqueueTime: time.Now(), } tc.workqueue.Add(item) klog.Infof("Added %s to the workqueue", item) diff --git a/pkg/controllers/tagging/tagging_controller_test.go b/pkg/controllers/tagging/tagging_controller_test.go index 8f35810b30..606aabdb60 100644 --- a/pkg/controllers/tagging/tagging_controller_test.go +++ b/pkg/controllers/tagging/tagging_controller_test.go @@ -135,15 +135,26 @@ func Test_NodesJoiningAndLeaving(t *testing.T) { } else { tc.enqueueNode(testcase.currNode, tc.untagNodeResources) } - tc.process() + + for tc.workqueue.Len() > 0 { + tc.process() + + // sleep briefly because of exponential backoff when requeueing failed workitem + // resulting in workqueue to be empty if checked immediately + time.Sleep(1500 * time.Millisecond) + } for _, msg := range testcase.expectedMessages { if !strings.Contains(logBuf.String(), msg) { t.Errorf("\nMsg %q not found in log: \n%v\n", msg, logBuf.String()) } - if strings.Contains(logBuf.String(), "error tagging ") || strings.Contains(logBuf.String(), "error untagging ") { - if !strings.Contains(logBuf.String(), ", requeuing") { - t.Errorf("\nFailed to tag or untag but logs do not contain 'requeueing': \n%v\n", logBuf.String()) + if strings.Contains(logBuf.String(), "Unable to tag") || strings.Contains(logBuf.String(), "Unable to untag") { + if !strings.Contains(logBuf.String(), ", requeuing count ") { + t.Errorf("\nFailed to tag or untag but logs did not requeue: \n%v\n", logBuf.String()) + } + + if !strings.Contains(logBuf.String(), "requeuing count exceeded") { + t.Errorf("\nExceeded requeue count but did not stop: \n%v\n", logBuf.String()) } } }