Skip to content

Commit

Permalink
Merge pull request #296 from linode/more-robust-healthcheck
Browse files Browse the repository at this point in the history
[feat] add linode token health check
  • Loading branch information
ficap-akamai authored Jan 29, 2025
2 parents 1f644a0 + 8b1e8df commit c044e45
Show file tree
Hide file tree
Showing 8 changed files with 308 additions and 18 deletions.
21 changes: 21 additions & 0 deletions cloud/linode/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package client

import (
"context"
"errors"
"fmt"
"net/http"
"os"
Expand Down Expand Up @@ -52,6 +53,8 @@ type Client interface {
DeleteFirewall(ctx context.Context, fwid int) error
GetFirewall(context.Context, int) (*linodego.Firewall, error)
UpdateFirewallRules(context.Context, int, linodego.FirewallRuleSet) (*linodego.FirewallRuleSet, error)

GetProfile(ctx context.Context) (*linodego.Profile, error)
}

// linodego.Client implements Client
Expand All @@ -73,3 +76,21 @@ func New(token string, timeout time.Duration) (*linodego.Client, error) {
klog.V(3).Infof("Linode client created with default timeout of %v", timeout)
return client, nil
}

func CheckClientAuthenticated(ctx context.Context, client Client) (bool, error) {
_, err := client.GetProfile(ctx)
if err == nil {
return true, nil
}

var linodeErr *linodego.Error
if !errors.As(err, &linodeErr) {
return false, err
}

if linodego.ErrHasStatus(err, http.StatusUnauthorized) {
return false, nil
}

return false, err
}
15 changes: 15 additions & 0 deletions cloud/linode/client/mocks/mock_client.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

60 changes: 44 additions & 16 deletions cloud/linode/cloud.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package linode

import (
"context"
"fmt"
"io"
"net"
Expand All @@ -19,11 +20,12 @@ import (

const (
// The name of this cloudprovider
ProviderName = "linode"
accessTokenEnv = "LINODE_API_TOKEN"
regionEnv = "LINODE_REGION"
ciliumLBType = "cilium-bgp"
nodeBalancerLBType = "nodebalancer"
ProviderName = "linode"
accessTokenEnv = "LINODE_API_TOKEN"
regionEnv = "LINODE_REGION"
ciliumLBType = "cilium-bgp"
nodeBalancerLBType = "nodebalancer"
tokenHealthCheckPeriod = 5 * time.Minute
)

var supportedLoadBalancerTypes = []string{ciliumLBType, nodeBalancerLBType}
Expand All @@ -32,9 +34,10 @@ var supportedLoadBalancerTypes = []string{ciliumLBType, nodeBalancerLBType}
// We expect it to be initialized with flags external to this package, likely in
// main.go
var Options struct {
KubeconfigFlag *pflag.Flag
LinodeGoDebug bool
EnableRouteController bool
KubeconfigFlag *pflag.Flag
LinodeGoDebug bool
EnableRouteController bool
EnableTokenHealthChecker bool
// Deprecated: use VPCNames instead
VPCName string
VPCNames string
Expand All @@ -43,13 +46,15 @@ var Options struct {
IpHolderSuffix string
LinodeExternalNetwork *net.IPNet
NodeBalancerTags []string
GlobalStopChannel chan<- struct{}
}

type linodeCloud struct {
client client.Client
instances cloudprovider.InstancesV2
loadbalancers cloudprovider.LoadBalancer
routes cloudprovider.Routes
client client.Client
instances cloudprovider.InstancesV2
loadbalancers cloudprovider.LoadBalancer
routes cloudprovider.Routes
linodeTokenHealthChecker *healthChecker
}

var instanceCache *instances
Expand Down Expand Up @@ -91,6 +96,24 @@ func newCloud() (cloudprovider.Interface, error) {
linodeClient.SetDebug(true)
}

var healthChecker *healthChecker

if Options.EnableTokenHealthChecker {
authenticated, err := client.CheckClientAuthenticated(context.TODO(), linodeClient)
if err != nil {
return nil, fmt.Errorf("linode client authenticated connection error: %w", err)
}

if !authenticated {
return nil, fmt.Errorf("linode api token %q is invalid", accessTokenEnv)
}

healthChecker, err = newHealthChecker(apiToken, timeout, tokenHealthCheckPeriod, Options.GlobalStopChannel)
if err != nil {
return nil, fmt.Errorf("unable to initialize healthchecker: %w", err)
}
}

if Options.VPCName != "" && Options.VPCNames != "" {
return nil, fmt.Errorf("cannot have both vpc-name and vpc-names set")
}
Expand Down Expand Up @@ -126,10 +149,11 @@ func newCloud() (cloudprovider.Interface, error) {

// create struct that satisfies cloudprovider.Interface
lcloud := &linodeCloud{
client: linodeClient,
instances: instanceCache,
loadbalancers: newLoadbalancers(linodeClient, region),
routes: routes,
client: linodeClient,
instances: instanceCache,
loadbalancers: newLoadbalancers(linodeClient, region),
routes: routes,
linodeTokenHealthChecker: healthChecker,
}
return lcloud, nil
}
Expand All @@ -140,6 +164,10 @@ func (c *linodeCloud) Initialize(clientBuilder cloudprovider.ControllerClientBui
serviceInformer := sharedInformer.Core().V1().Services()
nodeInformer := sharedInformer.Core().V1().Nodes()

if c.linodeTokenHealthChecker != nil {
go c.linodeTokenHealthChecker.Run(stopCh)
}

serviceController := newServiceController(c.loadbalancers.(*loadbalancers), serviceInformer)
go serviceController.Run(stopCh)

Expand Down
63 changes: 63 additions & 0 deletions cloud/linode/health_check.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package linode

import (
"context"
"time"

"github.com/linode/linode-cloud-controller-manager/cloud/linode/client"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/klog/v2"
)

type healthChecker struct {
period time.Duration
linodeClient client.Client
stopCh chan<- struct{}
}

func newHealthChecker(apiToken string, timeout time.Duration, period time.Duration, stopCh chan<- struct{}) (*healthChecker, error) {
client, err := client.New(apiToken, timeout)
if err != nil {
return nil, err
}

return &healthChecker{
period: period,
linodeClient: client,
stopCh: stopCh,
}, nil
}

func (r *healthChecker) Run(stopCh <-chan struct{}) {
ctx := wait.ContextForChannel(stopCh)
wait.Until(r.worker(ctx), r.period, stopCh)
}

func (r *healthChecker) worker(ctx context.Context) func() {
return func() {
r.do(ctx)
}
}

func (r *healthChecker) do(ctx context.Context) {
if r.stopCh == nil {
klog.Errorf("stop signal already fired. nothing to do")
return
}

authenticated, err := client.CheckClientAuthenticated(ctx, r.linodeClient)
if err != nil {
klog.Warningf("unable to determine linode client authentication status: %s", err.Error())
return
}

if !authenticated {
klog.Error("detected invalid linode api token: stopping controllers")

close(r.stopCh)
r.stopCh = nil
return
}

klog.Info("linode api token is healthy")
}
153 changes: 153 additions & 0 deletions cloud/linode/health_check_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package linode

import (
"testing"
"time"

"github.com/golang/mock/gomock"
"github.com/linode/linode-cloud-controller-manager/cloud/linode/client/mocks"
"github.com/linode/linodego"
)

func TestHealthCheck(t *testing.T) {
testCases := []struct {
name string
f func(*testing.T, *mocks.MockClient)
}{
{
name: "Test succeeding calls to linode api stop signal is not fired",
f: testSucceedingCallsToLinodeAPIHappenStopSignalNotFired,
},
{
name: "Test Unauthorized calls to linode api stop signal is fired",
f: testFailingCallsToLinodeAPIHappenStopSignalFired,
},
{
name: "Test failing calls to linode api stop signal is not fired",
f: testErrorCallsToLinodeAPIHappenStopSignalNotFired,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

client := mocks.NewMockClient(ctrl)
tc.f(t, client)
})
}
}

func testSucceedingCallsToLinodeAPIHappenStopSignalNotFired(t *testing.T, client *mocks.MockClient) {
writableStopCh := make(chan struct{})
readableStopCh := make(chan struct{})

client.EXPECT().GetProfile(gomock.Any()).Times(2).Return(&linodego.Profile{}, nil)

hc, err := newHealthChecker("validToken", 1*time.Second, 1*time.Second, writableStopCh)
if err != nil {
t.Fatalf("expected a nil error, got %v", err)
}
// inject mocked linodego.Client
hc.linodeClient = client

defer close(readableStopCh)
go hc.Run(readableStopCh)

// wait for two checks to happen
time.Sleep(1500 * time.Millisecond)

select {
case <-writableStopCh:
t.Error("healthChecker sent stop signal")
default:
}
}

func testFailingCallsToLinodeAPIHappenStopSignalFired(t *testing.T, client *mocks.MockClient) {
writableStopCh := make(chan struct{})
readableStopCh := make(chan struct{})

client.EXPECT().GetProfile(gomock.Any()).Times(1).Return(&linodego.Profile{}, nil)

hc, err := newHealthChecker("validToken", 1*time.Second, 1*time.Second, writableStopCh)
if err != nil {
t.Fatalf("expected a nil error, got %v", err)
}
// inject mocked linodego.Client
hc.linodeClient = client

defer close(readableStopCh)
go hc.Run(readableStopCh)

// wait for check to happen
time.Sleep(500 * time.Millisecond)

select {
case <-writableStopCh:
t.Error("healthChecker sent stop signal")
default:
}

// invalidate token
client.EXPECT().GetProfile(gomock.Any()).Times(1).Return(&linodego.Profile{}, &linodego.Error{Code: 401, Message: "Invalid Token"})

// wait for check to happen
time.Sleep(1 * time.Second)

select {
case <-writableStopCh:
default:
t.Error("healthChecker did not send stop signal")
}
}

func testErrorCallsToLinodeAPIHappenStopSignalNotFired(t *testing.T, client *mocks.MockClient) {
writableStopCh := make(chan struct{})
readableStopCh := make(chan struct{})

client.EXPECT().GetProfile(gomock.Any()).Times(1).Return(&linodego.Profile{}, nil)

hc, err := newHealthChecker("validToken", 1*time.Second, 1*time.Second, writableStopCh)
if err != nil {
t.Fatalf("expected a nil error, got %v", err)
}
// inject mocked linodego.Client
hc.linodeClient = client

defer close(readableStopCh)
go hc.Run(readableStopCh)

// wait for check to happen
time.Sleep(500 * time.Millisecond)

select {
case <-writableStopCh:
t.Error("healthChecker sent stop signal")
default:
}

// simulate server error
client.EXPECT().GetProfile(gomock.Any()).Times(1).Return(&linodego.Profile{}, &linodego.Error{Code: 500})

// wait for check to happen
time.Sleep(1 * time.Second)

select {
case <-writableStopCh:
t.Error("healthChecker sent stop signal")
default:
}

client.EXPECT().GetProfile(gomock.Any()).Times(1).Return(&linodego.Profile{}, nil)

// wait for check to happen
time.Sleep(1 * time.Second)

select {
case <-writableStopCh:
t.Error("healthChecker sent stop signal")
default:
}
}
Loading

0 comments on commit c044e45

Please sign in to comment.