Skip to content

Commit

Permalink
Run cluster status monitor on unsharded controller only
Browse files Browse the repository at this point in the history
Running one cluster status monitor per Fleet controller pod is not
necessary and may cause conflicts in sharded setups.
  • Loading branch information
weyfonk committed Oct 22, 2024
1 parent c4d94bd commit 60d35cf
Showing 1 changed file with 11 additions and 9 deletions.
20 changes: 11 additions & 9 deletions internal/cmd/controller/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,16 +172,18 @@ func start(
return err
}

setupLog.Info("starting cluster status monitor")
cfg := fleetcfg.Get()
// No need to run a similar check on the threshold, since its minimum value will be a multiple of the agent check-in
// interval anyway.
if cfg.ClusterMonitorInterval.Seconds() == 0 {
err := errors.New("cluster status monitor interval cannot be 0")
setupLog.Error(err, "cannot start cluster status monitor")
return err
if shardID == "" { // only one instance of the cluster status monitor needs to run.
setupLog.Info("starting cluster status monitor")
cfg := fleetcfg.Get()
// No need to run a similar check on the threshold, since its minimum value will be a multiple of the agent check-in
// interval anyway.
if cfg.ClusterMonitorInterval.Seconds() == 0 {
err := errors.New("cluster status monitor interval cannot be 0")
setupLog.Error(err, "cannot start cluster status monitor")
return err
}
go clustermonitor.Run(ctx, mgr.GetClient(), cfg.ClusterMonitorInterval.Duration, cfg.ClusterMonitorThreshold.Duration)
}
go clustermonitor.Run(ctx, mgr.GetClient(), cfg.ClusterMonitorInterval.Duration, cfg.ClusterMonitorThreshold.Duration)

setupLog.Info("starting job scheduler")
jobCtx, cancel := context.WithCancel(ctx)
Expand Down

0 comments on commit 60d35cf

Please sign in to comment.