-
Notifications
You must be signed in to change notification settings - Fork 122
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] Add Context
to Run
method of the manager
#757
Changes from all commits
a56a060
d3addec
ec930ae
c48450c
d456e17
1e723d4
699074e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,6 +30,7 @@ import ( | |
"os" | ||
goruntime "runtime" | ||
"strconv" | ||
"sync" | ||
"time" | ||
|
||
machinescheme "github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/scheme" | ||
|
@@ -73,7 +74,7 @@ var ( | |
) | ||
|
||
// Run runs the MCMServer. This should never exit. | ||
func Run(s *options.MCMServer) error { | ||
func Run(ctx context.Context, s *options.MCMServer) error { | ||
// To help debugging, immediately log version | ||
klog.V(4).Infof("Version: %+v", version.Get()) | ||
if err := s.Validate(); err != nil { | ||
|
@@ -126,8 +127,9 @@ func Run(s *options.MCMServer) error { | |
|
||
recorder := createRecorder(kubeClientControl) | ||
|
||
run := func(ctx context.Context) { | ||
var stop <-chan struct{} | ||
waitGroup := sync.WaitGroup{} | ||
waitGroup.Add(1) | ||
startControllers := func(ctx context.Context) { | ||
// Control plane client used to interact with machine APIs | ||
controlMachineClientBuilder := machinecontroller.SimpleClientBuilder{ | ||
ClientConfig: controlkubeconfig, | ||
|
@@ -141,25 +143,23 @@ func Run(s *options.MCMServer) error { | |
ClientConfig: targetkubeconfig, | ||
} | ||
|
||
err := StartControllers( | ||
if err := StartControllers( | ||
ctx, | ||
s, | ||
controlkubeconfig, | ||
targetkubeconfig, | ||
controlMachineClientBuilder, | ||
controlCoreClientBuilder, | ||
targetCoreClientBuilder, | ||
recorder, | ||
stop, | ||
) | ||
|
||
klog.Fatalf("error running controllers: %v", err) | ||
panic("unreachable") | ||
|
||
); err != nil { | ||
klog.Fatalf("failed to start controllers: %v", err) | ||
} | ||
waitGroup.Done() | ||
} | ||
|
||
if !s.LeaderElection.LeaderElect { | ||
run(nil) | ||
panic("unreachable") | ||
startControllers(ctx) | ||
} | ||
|
||
id, err := os.Hostname() | ||
|
@@ -182,32 +182,34 @@ func Run(s *options.MCMServer) error { | |
klog.Fatalf("error creating lock: %v", err) | ||
} | ||
|
||
ctx := context.TODO() | ||
leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{ | ||
Lock: rl, | ||
LeaseDuration: s.LeaderElection.LeaseDuration.Duration, | ||
RenewDeadline: s.LeaderElection.RenewDeadline.Duration, | ||
RetryPeriod: s.LeaderElection.RetryPeriod.Duration, | ||
Callbacks: leaderelection.LeaderCallbacks{ | ||
OnStartedLeading: run, | ||
OnStartedLeading: startControllers, | ||
OnStoppedLeading: func() { | ||
klog.Fatalf("leaderelection lost") | ||
klog.Errorf("leaderelection lost") | ||
waitGroup.Wait() | ||
Comment on lines
+193
to
+194
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't we exit immediately after losing leader election ? why wait? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just wanted to ensure that all the worker processes finish here. Otherwise you will see something like that:
The controller stops in the middle of the shutdown. I contrast to that the
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. okay, but this is for the case where an interrupt is sent to the leader process, which leads to shutdown of workers, but in case when the leader fails to renew lease(due to API latency for example) then there won't be shutdown of the workers right? So we'll keep on waiting? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. another case which I noticed , is when its attempting to acquire leader lease and we send a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One solution could be:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Leadership can be lost due to various reasons. Would it not be better if the all controllers that are started when acquiring leadership are subsequently stopped when losing a leadership. There is no need for exiting. I might have missed something, so please feel free to point that out. |
||
}, | ||
}, | ||
}) | ||
panic("unreachable") | ||
|
||
return nil | ||
} | ||
|
||
// StartControllers starts all the controllers which are a part of machine-controller-manager | ||
func StartControllers(s *options.MCMServer, | ||
func StartControllers( | ||
ctx context.Context, | ||
s *options.MCMServer, | ||
controlCoreKubeconfig *rest.Config, | ||
targetCoreKubeconfig *rest.Config, | ||
controlMachineClientBuilder machinecontroller.ClientBuilder, | ||
controlCoreClientBuilder corecontroller.ClientBuilder, | ||
targetCoreClientBuilder corecontroller.ClientBuilder, | ||
recorder record.EventRecorder, | ||
stop <-chan struct{}) error { | ||
|
||
) error { | ||
klog.V(5).Info("Getting available resources") | ||
availableResources, err := getAvailableResources(controlCoreClientBuilder) | ||
if err != nil { | ||
|
@@ -231,18 +233,16 @@ func StartControllers(s *options.MCMServer, | |
if availableResources[machineGVR] || availableResources[machineSetGVR] || availableResources[machineDeploymentGVR] { | ||
klog.V(5).Infof("Creating shared informers; resync interval: %v", s.MinResyncPeriod) | ||
|
||
controlMachineInformerFactory := machineinformers.NewFilteredSharedInformerFactory( | ||
controlMachineInformerFactory := machineinformers.NewSharedInformerFactoryWithOptions( | ||
controlMachineClientBuilder.ClientOrDie("control-machine-shared-informers"), | ||
s.MinResyncPeriod.Duration, | ||
s.Namespace, | ||
nil, | ||
machineinformers.WithNamespace(s.Namespace), | ||
) | ||
|
||
controlCoreInformerFactory := coreinformers.NewFilteredSharedInformerFactory( | ||
controlCoreInformerFactory := coreinformers.NewSharedInformerFactoryWithOptions( | ||
controlCoreClientBuilder.ClientOrDie("control-core-shared-informers"), | ||
s.MinResyncPeriod.Duration, | ||
s.Namespace, | ||
nil, | ||
coreinformers.WithNamespace(s.Namespace), | ||
) | ||
|
||
targetCoreInformerFactory := coreinformers.NewSharedInformerFactory( | ||
|
@@ -284,22 +284,28 @@ func StartControllers(s *options.MCMServer, | |
} | ||
klog.V(1).Info("Starting shared informers") | ||
|
||
controlMachineInformerFactory.Start(stop) | ||
controlCoreInformerFactory.Start(stop) | ||
targetCoreInformerFactory.Start(stop) | ||
controlMachineInformerFactory.Start(ctx.Done()) | ||
controlCoreInformerFactory.Start(ctx.Done()) | ||
targetCoreInformerFactory.Start(ctx.Done()) | ||
|
||
klog.V(5).Info("Running controller") | ||
go mcmcontroller.Run(int(s.ConcurrentNodeSyncs), stop) | ||
|
||
var waitGroup sync.WaitGroup | ||
waitGroup.Add(1) | ||
go func() { | ||
mcmcontroller.Run(ctx, int(s.ConcurrentNodeSyncs)) | ||
waitGroup.Done() | ||
}() | ||
waitGroup.Wait() | ||
} else { | ||
return fmt.Errorf("unable to start machine controller: API GroupVersion %q or %q or %q is not available; \nFound: %#v", machineGVR, machineSetGVR, machineDeploymentGVR, availableResources) | ||
} | ||
|
||
select {} | ||
return nil | ||
} | ||
|
||
// TODO: In general, any controller checking this needs to be dynamic so | ||
// users don't have to restart their controller manager if they change the apiserver. | ||
// TODO: In general, any controller checking this needs to be dynamic so users don't have to | ||
// restart their controller manager if they change the apiserver. | ||
// | ||
// Until we get there, the structure here needs to be exposed for the construction of a proper ControllerContext. | ||
func getAvailableResources(clientBuilder corecontroller.ClientBuilder) (map[schema.GroupVersionResource]bool, error) { | ||
var discoveryClient discovery.DiscoveryInterface | ||
|
@@ -330,16 +336,16 @@ func getAvailableResources(clientBuilder corecontroller.ClientBuilder) (map[sche | |
return nil, fmt.Errorf("failed to get api versions from server: %v: %v", healthzContent, err) | ||
} | ||
|
||
resourceMap, err := discoveryClient.ServerResources() | ||
_, resources, err := discoveryClient.ServerGroupsAndResources() | ||
if err != nil { | ||
utilruntime.HandleError(fmt.Errorf("unable to get all supported resources from server: %v", err)) | ||
} | ||
if len(resourceMap) == 0 { | ||
if len(resources) == 0 { | ||
return nil, fmt.Errorf("unable to get any supported resources from server") | ||
} | ||
|
||
allResources := map[schema.GroupVersionResource]bool{} | ||
for _, apiResourceList := range resourceMap { | ||
for _, apiResourceList := range resources { | ||
version, err := schema.ParseGroupVersion(apiResourceList.GroupVersion) | ||
if err != nil { | ||
return nil, err | ||
|
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
on sending
Ctrl-C
in case when mcm runs withleader-elect
disabled, the leader elect is attempted againa
return
here would help