diff --git a/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/cpu_system_eviction.go b/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/cpu_system_eviction.go new file mode 100644 index 0000000000..c545f1c953 --- /dev/null +++ b/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/cpu_system_eviction.go @@ -0,0 +1,146 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package eviction + +import ( + "encoding/json" + "time" + + cliflag "k8s.io/component-base/cli/flag" + + "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/eviction" +) + +const ( + defaultEnableCPUSystemEviction = false + defaultSystemLoadUpperBoundRatio = 2 + defaultSystemLoadLowerBoundRatio = 1 + defaultSystemUsageUpperBoundRatio = 0.8 + defaultSystemUsageLowerBoundRatio = 0.6 + defaultThresholdMetPercentage = 0.8 + defaultMetricRingSize = 10 + defaultEvictionCoolDownTime = 300 * time.Second + defaultCheckCPUManager = false + defaultCPUSystemPressureEvictionGracePeriod = -1 +) + +var defaultEvictionRankingMetrics = []string{"qos.pod", "native.qos.pod", "priority.pod"} + +type CPUSystemPressureEvictionOptions struct { + EnableCPUSystemEviction bool + SystemLoadUpperBoundRatio float64 + SystemLoadLowerBoundRatio float64 + SystemUsageUpperBoundRatio float64 + SystemUsageLowerBoundRatio float64 + ThresholdMetPercentage float64 + MetricRingSize int + EvictionCoolDownTime time.Duration + EvictionRankingMetrics []string + GracePeriod int64 + CheckCPUManager bool + RankingLabels StringToSlice +} + +func NewCPUSystemPressureEvictionOptions() *CPUSystemPressureEvictionOptions { + return &CPUSystemPressureEvictionOptions{ + EnableCPUSystemEviction: defaultEnableCPUSystemEviction, + SystemLoadUpperBoundRatio: defaultSystemLoadUpperBoundRatio, + SystemLoadLowerBoundRatio: defaultSystemLoadLowerBoundRatio, + SystemUsageUpperBoundRatio: defaultSystemUsageUpperBoundRatio, + SystemUsageLowerBoundRatio: defaultSystemUsageLowerBoundRatio, + ThresholdMetPercentage: defaultThresholdMetPercentage, + MetricRingSize: defaultMetricRingSize, + EvictionCoolDownTime: defaultEvictionCoolDownTime, + EvictionRankingMetrics: defaultEvictionRankingMetrics, + GracePeriod: defaultCPUSystemPressureEvictionGracePeriod, + CheckCPUManager: defaultCheckCPUManager, + RankingLabels: map[string][]string{}, + } +} + +func (o *CPUSystemPressureEvictionOptions) AddFlags(fss *cliflag.NamedFlagSets) { + fs := fss.FlagSet("eviction-cpu-system") + + fs.BoolVar(&o.EnableCPUSystemEviction, "eviction-cpu-system-enable", o.EnableCPUSystemEviction, + "set true to enable cpu system eviction") + fs.Float64Var(&o.SystemLoadUpperBoundRatio, "eviction-cpu-system-load-upper-bound-ratio", o.SystemLoadUpperBoundRatio, + "multiply node capacity by this ration to get the load upper bound. "+ + "if the load of the node is greater than the load upper bound repeatedly, the eviction will be triggered. "+ + "default 2.0") + fs.Float64Var(&o.SystemLoadLowerBoundRatio, "eviction-cpu-system-load-lower-bound-ratio", o.SystemLoadLowerBoundRatio, + "multiply node capacity by this ration to get the load lower bound. "+ + "if the load of the node is greater than the load lower bound repeatedly, node taint will be triggered. "+ + "default 1.0") + fs.Float64Var(&o.SystemUsageUpperBoundRatio, "eviction-cpu-system-usage-upper-bound-ratio", o.SystemUsageUpperBoundRatio, + "multiply node capacity by this ration to get the usage upper bound. "+ + "if the cpu usage of the node is greater than the usage upper bound repeatedly, the eviction will be triggered. "+ + "default 0.8") + fs.Float64Var(&o.SystemUsageLowerBoundRatio, "eviction-cpu-system-usage-lower-bound-ratio", o.SystemUsageLowerBoundRatio, + "multiply node capacity by this ration to get the usage lower bound. "+ + "if the cpu usage of the node is greater than the usage lower bound repeatedly, node taint will be triggered. "+ + "default 0.6") + fs.Float64Var(&o.ThresholdMetPercentage, "eviction-cpu-system-threshold-met-percentage", o.ThresholdMetPercentage, + "the ratio between the times metric value over the bound value and the metric ring size is greater than this percentage "+ + ", the eviction or node taint will be triggered, default 0.8") + fs.IntVar(&o.MetricRingSize, "eviction-cpu-system-metric-ring-size", o.MetricRingSize, + "the size of the metric ring, which is used to cache and aggregate the metrics of the node, default 10") + fs.DurationVar(&o.EvictionCoolDownTime, "eviction-cpu-system-cool-down-time", o.EvictionCoolDownTime, + "the cool-down time of cpu system eviction, if the cpu system eviction is triggered, "+ + "the cpu system eviction will be disabled for the cool-down time") + fs.StringSliceVar(&o.EvictionRankingMetrics, "eviction-cpu-system-ranking-metrics", o.EvictionRankingMetrics, + "metrics for ranking active pods when GetTopEvictionPods") + fs.Int64Var(&o.GracePeriod, "eviction-cpu-system-grace-period", o.GracePeriod, + "grace period when evicting pod") + fs.BoolVar(&o.CheckCPUManager, "eviction-cpu-system-check-cpumanager", o.CheckCPUManager, + "set true to check kubelet CPUManager policy, if CPUManager is on, guaranteed pods will be filtered when collecting metrics and evicting pods") + fs.Var(&o.RankingLabels, "eviction-cpu-system-ranking-labels", "custom ranking labels, The later label values in the array have a higher eviction precedence") +} + +func (o *CPUSystemPressureEvictionOptions) ApplyTo(c *eviction.CPUSystemPressureEvictionPluginConfiguration) error { + c.EnableCPUSystemEviction = o.EnableCPUSystemEviction + c.SystemLoadUpperBoundRatio = o.SystemLoadUpperBoundRatio + c.SystemLoadLowerBoundRatio = o.SystemLoadLowerBoundRatio + c.SystemUsageUpperBoundRatio = o.SystemUsageUpperBoundRatio + c.SystemUsageLowerBoundRatio = o.SystemUsageLowerBoundRatio + c.ThresholdMetPercentage = o.ThresholdMetPercentage + c.MetricRingSize = o.MetricRingSize + c.EvictionCoolDownTime = o.EvictionCoolDownTime + c.EvictionRankingMetrics = o.EvictionRankingMetrics + c.GracePeriod = o.GracePeriod + c.CheckCPUManager = o.CheckCPUManager + c.RankingLabels = o.RankingLabels + return nil +} + +type StringToSlice map[string][]string + +func (s *StringToSlice) String() string { + res, err := json.Marshal(s) + if err != nil { + return "" + } + return string(res) +} + +func (s *StringToSlice) Set(value string) error { + err := json.Unmarshal([]byte(value), s) + return err +} + +func (s *StringToSlice) Type() string { + return "stringToSlice" +} diff --git a/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/eviction_base.go b/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/eviction_base.go index d07444ece0..145ff39f90 100644 --- a/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/eviction_base.go +++ b/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/eviction_base.go @@ -33,6 +33,7 @@ type EvictionOptions struct { *ReclaimedResourcesEvictionOptions *SystemLoadPressureEvictionOptions *RootfsPressureEvictionOptions + *CPUSystemPressureEvictionOptions } func NewEvictionOptions() *EvictionOptions { @@ -42,6 +43,7 @@ func NewEvictionOptions() *EvictionOptions { ReclaimedResourcesEvictionOptions: NewReclaimedResourcesEvictionOptions(), SystemLoadPressureEvictionOptions: NewSystemLoadPressureEvictionOptions(), RootfsPressureEvictionOptions: NewRootfsPressureEvictionOptions(), + CPUSystemPressureEvictionOptions: NewCPUSystemPressureEvictionOptions(), } } @@ -55,6 +57,7 @@ func (o *EvictionOptions) AddFlags(fss *cliflag.NamedFlagSets) { o.ReclaimedResourcesEvictionOptions.AddFlags(fss) o.SystemLoadPressureEvictionOptions.AddFlags(fss) o.RootfsPressureEvictionOptions.AddFlags(fss) + o.CPUSystemPressureEvictionOptions.AddFlags(fss) } func (o *EvictionOptions) ApplyTo(c *eviction.EvictionConfiguration) error { @@ -65,5 +68,6 @@ func (o *EvictionOptions) ApplyTo(c *eviction.EvictionConfiguration) error { errList = append(errList, o.ReclaimedResourcesEvictionOptions.ApplyTo(c.ReclaimedResourcesEvictionConfiguration)) errList = append(errList, o.SystemLoadPressureEvictionOptions.ApplyTo(c.SystemLoadEvictionPluginConfiguration)) errList = append(errList, o.RootfsPressureEvictionOptions.ApplyTo(c.RootfsPressureEvictionConfiguration)) + errList = append(errList, o.CPUSystemPressureEvictionOptions.ApplyTo(c.CPUSystemPressureEvictionPluginConfiguration)) return errors.NewAggregate(errList) } diff --git a/go.mod b/go.mod index fb2c058cfd..f22dc1f1e5 100644 --- a/go.mod +++ b/go.mod @@ -151,6 +151,7 @@ require ( ) replace ( + github.com/kubewharf/katalyst-api => github.com/WangZzzhe/katalyst-api v0.0.0-20240321082255-89348b7117ac k8s.io/api => k8s.io/api v0.24.6 k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.6 k8s.io/apimachinery => k8s.io/apimachinery v0.24.6 diff --git a/go.sum b/go.sum index b437f7b867..1faafaf514 100644 --- a/go.sum +++ b/go.sum @@ -82,6 +82,8 @@ github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWX github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= +github.com/WangZzzhe/katalyst-api v0.0.0-20240321082255-89348b7117ac h1:rt3Cy0yd/2f6I9+4rRZQkKV0V98txao7fZ8fYVOP3FA= +github.com/WangZzzhe/katalyst-api v0.0.0-20240321082255-89348b7117ac/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k= github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= @@ -554,8 +556,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/kubewharf/katalyst-api v0.4.1-0.20240315044944-45cdd48ceedc h1:KD5OnzzF1B44TpV2W+nTqCghwW7jlCqjfCZ94z6QWLg= -github.com/kubewharf/katalyst-api v0.4.1-0.20240315044944-45cdd48ceedc/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k= github.com/kubewharf/kubelet v1.24.6-kubewharf.8 h1:2e89T/nZTgzaVhyRsZuwEdRk8V8kJXs4PRkgfeG4Ai4= github.com/kubewharf/kubelet v1.24.6-kubewharf.8/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c= github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8= diff --git a/pkg/agent/evictionmanager/manager.go b/pkg/agent/evictionmanager/manager.go index df162f8667..ecfc1185b9 100644 --- a/pkg/agent/evictionmanager/manager.go +++ b/pkg/agent/evictionmanager/manager.go @@ -38,6 +38,7 @@ import ( pluginapi "github.com/kubewharf/katalyst-api/pkg/protocol/evictionplugin/v1alpha1" endpointpkg "github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/endpoint" "github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin" + "github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin/cpu" "github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin/memory" "github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin/resource" "github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin/rootfs" @@ -123,6 +124,7 @@ func NewInnerEvictionPluginInitializers() map[string]plugin.InitFunc { innerEvictionPluginInitializers[memory.EvictionPluginNameSystemMemoryPressure] = memory.NewSystemPressureEvictionPlugin innerEvictionPluginInitializers[memory.EvictionPluginNameRssOveruse] = memory.NewRssOveruseEvictionPlugin innerEvictionPluginInitializers[rootfs.EvictionPluginNamePodRootfsPressure] = rootfs.NewPodRootfsPressureEvictionPlugin + innerEvictionPluginInitializers[cpu.EvictionPluginNameSystemCPUPressure] = cpu.NewCPUSystemPressureEvictionPlugin return innerEvictionPluginInitializers } diff --git a/pkg/agent/evictionmanager/plugin/cpu/system_pressure.go b/pkg/agent/evictionmanager/plugin/cpu/system_pressure.go new file mode 100644 index 0000000000..896962bac8 --- /dev/null +++ b/pkg/agent/evictionmanager/plugin/cpu/system_pressure.go @@ -0,0 +1,557 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cpu + +import ( + "context" + "fmt" + "strings" + "sync" + "time" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/tools/events" + "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/features" + "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" + + "github.com/kubewharf/katalyst-api/pkg/protocol/evictionplugin/v1alpha1" + "github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin" + "github.com/kubewharf/katalyst-core/pkg/client" + "github.com/kubewharf/katalyst-core/pkg/config" + "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" + evictionconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/eviction" + "github.com/kubewharf/katalyst-core/pkg/consts" + "github.com/kubewharf/katalyst-core/pkg/metaserver" + "github.com/kubewharf/katalyst-core/pkg/metrics" + "github.com/kubewharf/katalyst-core/pkg/util/general" + "github.com/kubewharf/katalyst-core/pkg/util/metric" + "github.com/kubewharf/katalyst-core/pkg/util/native" + "github.com/kubewharf/katalyst-core/pkg/util/process" +) + +const ( + EvictionPluginNameSystemCPUPressure = "system-cpu-pressure-eviction-plugin" + EvictionConditionSystemCPU = "SystemCPU" + FakeLabelMetricPrefix = "label" + FakeLabelMetricSeparator = "." +) + +var ( + podMetrics = []string{consts.MetricLoad1MinContainer, consts.MetricCPUUsageContainer} +) + +type entries map[string]*metric.MetricRing + +type SystemPressureEvictionPlugin struct { + pluginName string + + sync.Mutex + *process.StopControl + + emitter metrics.MetricEmitter + metaServer *metaserver.MetaServer + dynamicConf *dynamic.DynamicAgentConfiguration + conf *config.Configuration + + podMetricsHistory map[string]entries + nodeMetricsHistory entries + + nodeCapacity int + overMetricName string + lastEvictionTime time.Time + syncPeriod time.Duration +} + +func NewCPUSystemPressureEvictionPlugin( + _ *client.GenericClientSet, + _ events.EventRecorder, + metaServer *metaserver.MetaServer, + emitter metrics.MetricEmitter, + conf *config.Configuration) plugin.EvictionPlugin { + p := &SystemPressureEvictionPlugin{ + StopControl: process.NewStopControl(time.Time{}), + pluginName: EvictionPluginNameSystemCPUPressure, + emitter: emitter, + metaServer: metaServer, + syncPeriod: conf.EvictionManagerSyncPeriod, + nodeCapacity: metaServer.NumCPUs, // get cpu capacity by NumCPUs, allocatable may be mutated by overcommit webhook + conf: conf, + dynamicConf: conf.DynamicAgentConfiguration, + } + + return p +} + +func (s *SystemPressureEvictionPlugin) Start() { + general.Infof("%s", s.Name()) + go wait.UntilWithContext(context.TODO(), s.collectMetrics, s.syncPeriod) +} + +func (s *SystemPressureEvictionPlugin) Name() string { + return s.pluginName +} + +func (s *SystemPressureEvictionPlugin) collectMetrics(_ context.Context) { + timeout, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + podList, err := s.metaServer.GetPodList(timeout, native.PodIsActive) + if err != nil { + klog.Errorf("getPodList fail: %v", err) + return + } + + var ( + collectTime = time.Now().UnixNano() + nodeMetric = make(map[string]float64) + capacity = s.nodeCapacity + ) + + if s.dynamicConf.GetDynamicConfiguration().CheckCPUManager { + cpuManagerOn, err := s.checkKubeletCPUManager() + if err != nil { + klog.Errorf("checkKubeletCPUManager fail: %v", err) + return + } + if cpuManagerOn { + // if cpuManagerOn, only burstable and besteffort pods will be collect and evicted + podList, capacity, err = filterGuaranteedPods(podList, capacity) + if err != nil { + klog.Errorf("filterGuaranteedPods fail: %v", err) + return + } + } + } + + s.Lock() + defer s.Unlock() + + // clear expired pod metrics + s.clearExpiredMetricsHistory(podList) + + // store pod metrics for sort in ThresholdMet + for _, pod := range podList { + for _, metricName := range s.mergeCollectMetrics() { + metricData := s.metaServer.AggregatePodMetric([]*v1.Pod{pod}, metricName, metric.AggregatorSum, metric.DefaultContainerMetricFilter) + + snapshot := &metric.MetricSnapshot{ + Info: metric.MetricInfo{ + Name: metricName, + Value: metricData.Value, + }, + Time: collectTime, + } + s.pushMetric(metricName, string(pod.UID), snapshot) + + nodeMetric[metricName] += metricData.Value + } + } + + // store node metrics + for _, metricName := range podMetrics { + if metricVal, ok := nodeMetric[metricName]; ok { + s.pushNodeMetric(s.dynamicConf.GetDynamicConfiguration(), capacity, metricName, metricVal, collectTime) + klog.V(6).Infof("collectMetrics, metricName: %v, metricVal: %v, capacity: %v", metricName, metricVal, capacity) + } + } +} + +func (s *SystemPressureEvictionPlugin) ThresholdMet(_ context.Context) (*v1alpha1.ThresholdMetResponse, error) { + s.Lock() + defer s.Unlock() + + dynamicConfig := s.dynamicConf.GetDynamicConfiguration() + if !dynamicConfig.EnableCPUSystemEviction { + return &v1alpha1.ThresholdMetResponse{ + MetType: v1alpha1.ThresholdMetType_NOT_MET, + }, nil + } + + var ( + softOverResp *v1alpha1.ThresholdMetResponse + ) + + for _, metricName := range podMetrics { + metricCache, ok := s.nodeMetricsHistory[metricName] + if !ok { + klog.Warningf("metric %v history not found", metricName) + continue + } + + softOverCount, hardOverCount := metricCache.Count() + + softOverRatio := float64(softOverCount) / float64(metricCache.MaxLen) + hardOverRatio := float64(hardOverCount) / float64(metricCache.MaxLen) + + softOver := softOverRatio >= dynamicConfig.CPUSystemPressureEvictionPluginConfiguration.ThresholdMetPercentage + hardOver := hardOverRatio >= dynamicConfig.CPUSystemPressureEvictionPluginConfiguration.ThresholdMetPercentage + + klog.V(6).Infof("cpu system eviction, metric: %v, softOverCount: %v,"+ + "hardOverCount: %v, softOverRatio: %v, hardOverRatio: %v, "+ + "threshold: %v", metricName, softOverCount, hardOverCount, softOverRatio, hardOverRatio, + dynamicConfig.CPUSystemPressureEvictionPluginConfiguration.ThresholdMetPercentage) + + if hardOver { + s.overMetricName = metricName + return &v1alpha1.ThresholdMetResponse{ + ThresholdValue: hardOverRatio, + ObservedValue: dynamicConfig.CPUSystemPressureEvictionPluginConfiguration.ThresholdMetPercentage, + ThresholdOperator: v1alpha1.ThresholdOperator_GREATER_THAN, + MetType: v1alpha1.ThresholdMetType_HARD_MET, + EvictionScope: metricName, + Condition: &v1alpha1.Condition{ + ConditionType: v1alpha1.ConditionType_NODE_CONDITION, + Effects: []string{string(v1.TaintEffectNoSchedule)}, + ConditionName: EvictionConditionSystemCPU, + MetCondition: true, + }, + }, nil + } + + if softOver && softOverResp == nil { + softOverResp = &v1alpha1.ThresholdMetResponse{ + ThresholdValue: softOverRatio, + ObservedValue: dynamicConfig.CPUSystemPressureEvictionPluginConfiguration.ThresholdMetPercentage, + ThresholdOperator: v1alpha1.ThresholdOperator_GREATER_THAN, + MetType: v1alpha1.ThresholdMetType_SOFT_MET, + EvictionScope: metricName, + Condition: &v1alpha1.Condition{ + ConditionType: v1alpha1.ConditionType_NODE_CONDITION, + Effects: []string{string(v1.TaintEffectNoSchedule)}, + ConditionName: EvictionConditionSystemCPU, + MetCondition: true, + }, + } + } + } + s.overMetricName = "" + if softOverResp != nil { + return softOverResp, nil + } + return &v1alpha1.ThresholdMetResponse{ + MetType: v1alpha1.ThresholdMetType_NOT_MET, + }, nil +} + +func (s *SystemPressureEvictionPlugin) GetTopEvictionPods( + _ context.Context, + request *v1alpha1.GetTopEvictionPodsRequest, +) (*v1alpha1.GetTopEvictionPodsResponse, error) { + if request == nil { + return nil, fmt.Errorf("GetTopEvictionPods got nil request") + } + if len(request.ActivePods) == 0 { + klog.Warningf("cpu system eviction get empty active pods list") + return &v1alpha1.GetTopEvictionPodsResponse{}, nil + } + candidatePods := request.ActivePods + if s.dynamicConf.GetDynamicConfiguration().CheckCPUManager { + cpuManagerOn, err := s.checkKubeletCPUManager() + if err != nil { + err = fmt.Errorf("checkKubeletCPUManager fail: %v", err) + klog.Error(err) + return nil, err + } + if cpuManagerOn { + // if cpuManagerOn, only burstable and besteffort pods will be collect and evicted + candidatePods = native.FilterPods(request.GetActivePods(), func(pod *v1.Pod) (bool, error) { + if pod == nil { + return false, fmt.Errorf("FilterPods got nil pod") + } + if native.PodGuaranteedCPUs(pod) == 0 { + return true, nil + } + return false, nil + }) + } + } + + var ( + topN = general.MinUInt64(request.TopN, uint64(len(candidatePods))) + now = time.Now() + dynamicConfig = s.dynamicConf.GetDynamicConfiguration() + topNPods = make([]*v1.Pod, 0) + ) + + s.Lock() + defer s.Unlock() + + if !dynamicConfig.EnableCPUSystemEviction { + klog.Warningf("EnableCPUSystemEviction off, return") + return &v1alpha1.GetTopEvictionPodsResponse{}, nil + } + if s.overMetricName == "" { + klog.Warningf("cpu system eviction without over metric name, skip") + return &v1alpha1.GetTopEvictionPodsResponse{}, nil + } + + if !(s.lastEvictionTime.IsZero() || now.Sub(s.lastEvictionTime) >= dynamicConfig.EvictionCoolDownTime) { + general.Infof("in eviction cool-down time, skip eviction. now: %s, lastEvictionTime: %s", + now.String(), s.lastEvictionTime.String()) + return &v1alpha1.GetTopEvictionPodsResponse{}, nil + } + s.lastEvictionTime = now + + // sort pod by eviction cmp functions + general.NewMultiSorter(s.getEvictionCmpFunc()...).Sort(native.NewPodSourceImpList(candidatePods)) + + // get topN from sorted podlist + for i := 0; uint64(i) < topN; i++ { + topNPods = append(topNPods, candidatePods[i]) + } + + resp := &v1alpha1.GetTopEvictionPodsResponse{ + TargetPods: topNPods, + } + if dynamicConfig.CPUSystemPressureEvictionPluginConfiguration.GracePeriod > 0 { + resp.DeletionOptions = &v1alpha1.DeletionOptions{ + GracePeriodSeconds: dynamicConfig.CPUSystemPressureEvictionPluginConfiguration.GracePeriod, + } + } + + s.overMetricName = "" + return resp, nil +} + +func (s *SystemPressureEvictionPlugin) GetEvictPods( + _ context.Context, + _ *v1alpha1.GetEvictPodsRequest, +) (*v1alpha1.GetEvictPodsResponse, error) { + return &v1alpha1.GetEvictPodsResponse{}, nil +} + +func (s *SystemPressureEvictionPlugin) pushMetric(metricName, podUID string, snapshot *metric.MetricSnapshot) { + if s.podMetricsHistory == nil { + s.podMetricsHistory = map[string]entries{} + } + if _, ok := s.podMetricsHistory[metricName]; !ok { + s.podMetricsHistory[metricName] = map[string]*metric.MetricRing{} + } + if _, ok := s.podMetricsHistory[metricName][podUID]; !ok { + s.podMetricsHistory[metricName][podUID] = metric.CreateMetricRing(s.dynamicConf.GetDynamicConfiguration().MetricRingSize) + } + + s.podMetricsHistory[metricName][podUID].Push(snapshot) +} + +func (s *SystemPressureEvictionPlugin) pushNodeMetric(dynamicConfig *dynamic.Configuration, nodeCapacity int, metricName string, value float64, collectTime int64) { + if s.nodeMetricsHistory == nil { + s.nodeMetricsHistory = map[string]*metric.MetricRing{} + } + if _, ok := s.nodeMetricsHistory[metricName]; !ok { + s.nodeMetricsHistory[metricName] = metric.CreateMetricRing(s.dynamicConf.GetDynamicConfiguration().MetricRingSize) + } + var upperBound, lowerBound float64 + switch metricName { + case consts.MetricLoad1MinContainer: + upperBound = dynamicConfig.CPUSystemPressureEvictionPluginConfiguration.SystemLoadUpperBoundRatio + lowerBound = dynamicConfig.CPUSystemPressureEvictionPluginConfiguration.SystemLoadLowerBoundRatio + case consts.MetricCPUUsageContainer: + upperBound = dynamicConfig.CPUSystemPressureEvictionPluginConfiguration.SystemUsageUpperBoundRatio + lowerBound = dynamicConfig.CPUSystemPressureEvictionPluginConfiguration.SystemUsageLowerBoundRatio + default: + klog.Errorf("unexpected metricName: %v", metricName) + return + } + + snapshot := &metric.MetricSnapshot{ + Info: metric.MetricInfo{ + Name: metricName, + Value: value, + UpperBound: float64(nodeCapacity) * upperBound, + LowerBound: float64(nodeCapacity) * lowerBound, + }, + Time: collectTime, + } + s.nodeMetricsHistory[metricName].Push(snapshot) +} + +func (s *SystemPressureEvictionPlugin) getEvictionCmpFunc() []general.CmpFunc { + dynamicConfig := s.dynamicConf.GetDynamicConfiguration() + cmpFuncs := make([]general.CmpFunc, 0) + rankingMetrics := dynamicConfig.EvictionRankingMetrics + if !sets.NewString(rankingMetrics...).Has(s.overMetricName) { + rankingMetrics = append(rankingMetrics, s.overMetricName) + } + + for _, m := range rankingMetrics { + currentMetric := m + cmpFuncs = append(cmpFuncs, func(i1, i2 interface{}) int { + p1, p2 := i1.(*v1.Pod), i2.(*v1.Pod) + switch currentMetric { + case evictionconfig.FakeMetricQoSLevel: + return s.cmpKatalystQoS(p1, p2) + case evictionconfig.FakeMetricPriority: + return general.ReverseCmpFunc(native.PodPriorityCmpFunc)(p1, p2) + case evictionconfig.FakeMetricNativeQoSLevel: + return native.PodQoSCmpFunc(p1, p2) + case evictionconfig.FakeMetricOwnerLevel: + return native.PodOwnerCmpFunc(p1, p2) + default: + if labelKey, ok := s.splitKeyFromFakeLabelMetric(currentMetric); ok { + return s.cmpSpecifiedLabels(p1, p2, labelKey) + } else { + return s.cmpMetric(currentMetric, p1, p2) + } + } + }) + } + + return cmpFuncs +} + +func (s *SystemPressureEvictionPlugin) cmpMetric(metricName string, p1, p2 *v1.Pod) int { + return general.CmpFloat64(s.getPodMetricHistory(metricName, p1), s.getPodMetricHistory(metricName, p2)) +} + +func (s *SystemPressureEvictionPlugin) cmpKatalystQoS(p1, p2 *v1.Pod) int { + p1Reclaim, err1 := s.conf.CheckReclaimedQoSForPod(p1) + p2Reclaim, err2 := s.conf.CheckReclaimedQoSForPod(p2) + if err1 != nil || err2 != nil { + return general.CmpError(err1, err2) + } + + return general.CmpBool(p1Reclaim, p2Reclaim) +} + +func (s *SystemPressureEvictionPlugin) cmpSpecifiedLabels(p1, p2 *v1.Pod, labelKey string) int { + vals, ok := s.dynamicConf.GetDynamicConfiguration().RankingLabels[labelKey] + if !ok { + return 0 + } + p1Val, ok := p1.Labels[labelKey] + if !ok { + return -1 + } + p2Val, ok := p2.Labels[labelKey] + if !ok { + return 1 + } + + valsScoreMap := make(map[string]int32) + for i, val := range vals { + valsScoreMap[val] = int32(i + 1) + } + return general.CmpInt32(valsScoreMap[p1Val], valsScoreMap[p2Val]) +} + +func (s *SystemPressureEvictionPlugin) splitKeyFromFakeLabelMetric(metricName string) (string, bool) { + keys := strings.SplitN(metricName, FakeLabelMetricSeparator, 2) + if len(keys) <= 1 { + return "", false + } + + if keys[0] != FakeLabelMetricPrefix { + return "", false + } + + return keys[1], true +} + +func (s *SystemPressureEvictionPlugin) mergeCollectMetrics() []string { + metrics := sets.NewString(podMetrics...) + for _, metricName := range s.dynamicConf.GetDynamicConfiguration().EvictionRankingMetrics { + switch metricName { + case evictionconfig.FakeMetricOwnerLevel, evictionconfig.FakeMetricPriority, evictionconfig.FakeMetricQoSLevel, evictionconfig.FakeMetricNativeQoSLevel: + continue + default: + if _, ok := s.splitKeyFromFakeLabelMetric(metricName); ok { + continue + } + + if !metrics.Has(metricName) { + metrics.Insert(metricName) + } + } + } + return metrics.UnsortedList() +} + +// get sum of pod metrics history in metric ring cache +func (s *SystemPressureEvictionPlugin) getPodMetricHistory(metricName string, pod *v1.Pod) float64 { + podUID := string(pod.UID) + if _, ok := s.podMetricsHistory[metricName]; !ok { + return 0 + } + + if _, ok := s.podMetricsHistory[metricName][podUID]; !ok { + return 0 + } + + return s.podMetricsHistory[metricName][podUID].Sum() +} + +// clear expired pod metrics without lock +func (s *SystemPressureEvictionPlugin) clearExpiredMetricsHistory(podList []*v1.Pod) { + podSet := sets.NewString() + for _, pod := range podList { + podSet.Insert(string(pod.UID)) + } + + for metricName, entries := range s.podMetricsHistory { + for podUID := range entries { + if !podSet.Has(podUID) { + delete(s.podMetricsHistory[metricName], podUID) + } + } + } +} + +func (s *SystemPressureEvictionPlugin) checkKubeletCPUManager() (bool, error) { + timeout, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + + kubeletConfig, err := s.metaServer.GetKubeletConfig(timeout) + if err != nil { + return false, err + } + + featureGates := kubeletConfig.FeatureGates + on, ok := featureGates[string(features.CPUManager)] + cpuManagerPolicy := kubeletConfig.CPUManagerPolicy + + if (ok && !on) || (cpuManagerPolicy == string(cpumanager.PolicyNone)) { + return false, nil + } + + return true, nil +} + +// filterGuaranteedPods returns podList without guaranteed pods and capacity without guaranteed pods requests +func filterGuaranteedPods(podList []*v1.Pod, nodeCapacity int) ([]*v1.Pod, int, error) { + var ( + res = make([]*v1.Pod, 0) + cpus = 0 + ) + for _, pod := range podList { + guaranteedCPU := native.PodGuaranteedCPUs(pod) + if guaranteedCPU == 0 { + res = append(res, pod) + } else { + cpus += native.PodGuaranteedCPUs(pod) + } + } + + if cpus > nodeCapacity { + return res, 0, fmt.Errorf("guaranteed pod cpu request is greater than node capacity, cpus: %v, capacity: %v", + cpus, nodeCapacity) + } + + return res, nodeCapacity - cpus, nil +} diff --git a/pkg/agent/evictionmanager/plugin/cpu/system_pressure_test.go b/pkg/agent/evictionmanager/plugin/cpu/system_pressure_test.go new file mode 100644 index 0000000000..e410ed17ff --- /dev/null +++ b/pkg/agent/evictionmanager/plugin/cpu/system_pressure_test.go @@ -0,0 +1,1044 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cpu + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + v12 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/kubelet/config/v1beta1" + "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" + + "github.com/kubewharf/katalyst-api/pkg/consts" + evictionpluginapi "github.com/kubewharf/katalyst-api/pkg/protocol/evictionplugin/v1alpha1" + "github.com/kubewharf/katalyst-core/pkg/config" + "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" + "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos" + "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/eviction" + consts2 "github.com/kubewharf/katalyst-core/pkg/consts" + "github.com/kubewharf/katalyst-core/pkg/metaserver" + "github.com/kubewharf/katalyst-core/pkg/metaserver/agent" + "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/kubeletconfig" + "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric" + metrictypes "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/types" + "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod" + "github.com/kubewharf/katalyst-core/pkg/metrics" + "github.com/kubewharf/katalyst-core/pkg/util/general" + "github.com/kubewharf/katalyst-core/pkg/util/machine" + utilmetric "github.com/kubewharf/katalyst-core/pkg/util/metric" + "github.com/kubewharf/katalyst-core/pkg/util/native" +) + +var ( + highPriority int32 = 10000 + lowPriority int32 = 100 +) + +func TestCmpKatalystQoS(t *testing.T) { + t.Parallel() + podList := []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "p1", + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "p2", + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "p3", + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelReclaimedCores, + }, + }, + }, + } + conf := config.NewConfiguration() + s := &SystemPressureEvictionPlugin{ + conf: conf, + } + + general.NewMultiSorter(func(i1, i2 interface{}) int { + return s.cmpKatalystQoS(i1.(*v1.Pod), i2.(*v1.Pod)) + }).Sort(native.NewPodSourceImpList(podList)) + + res := make([]string, 0) + for _, pod := range podList { + res = append(res, pod.Name) + } + assert.Equal(t, []string{"p3", "p1", "p2"}, res) +} + +func TestCmpSpecifiedLabels(t *testing.T) { + t.Parallel() + podList := []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "p1", + Labels: map[string]string{ + "testLabel": "label1", + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "p3", + Labels: map[string]string{ + "testLabel": "label3", + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "p2", + Labels: map[string]string{ + "testLabel": "label2", + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "p4", + Labels: map[string]string{}, + }, + }, + } + + conf := config.NewConfiguration() + conf.DynamicAgentConfiguration = dynamic.NewDynamicAgentConfiguration() + conf.DynamicAgentConfiguration.SetDynamicConfiguration( + &dynamic.Configuration{ + AdminQoSConfiguration: &adminqos.AdminQoSConfiguration{ + EvictionConfiguration: &eviction.EvictionConfiguration{ + CPUSystemPressureEvictionPluginConfiguration: &eviction.CPUSystemPressureEvictionPluginConfiguration{ + RankingLabels: map[string][]string{ + "testLabel": { + "label3", "label2", "label1", + }, + }, + }, + }, + }, + }, + ) + s := &SystemPressureEvictionPlugin{ + conf: conf, + dynamicConf: conf.DynamicAgentConfiguration, + } + general.NewMultiSorter(func(i1, i2 interface{}) int { + return s.cmpSpecifiedLabels(i1.(*v1.Pod), i2.(*v1.Pod), "testLabel") + }).Sort(native.NewPodSourceImpList(podList)) + + res := make([]string, 0) + for _, pod := range podList { + res = append(res, pod.Name) + } + assert.Equal(t, []string{"p4", "p1", "p2", "p3"}, res) + +} + +func TestFilterGuaranteedPods(t *testing.T) { + t.Parallel() + + podList := []*v1.Pod{ + { + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("4"), + v1.ResourceMemory: resource.MustParse("8Gi"), + }, + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("4"), + v1.ResourceMemory: resource.MustParse("8Gi"), + }, + }, + }, + }, + }, + }, + { + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "c1", + Resources: v1.ResourceRequirements{ + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("4"), + v1.ResourceMemory: resource.MustParse("8Gi"), + }, + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("4"), + v1.ResourceMemory: resource.MustParse("8Gi"), + }, + }, + }, + { + Name: "c2", + Resources: v1.ResourceRequirements{ + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("4"), + }, + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("4"), + }, + }, + }, + }, + }, + }, + { + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("8Gi"), + }, + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("8Gi"), + }, + }, + }, + }, + }, + }, + { + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("4"), + }, + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("4"), + }, + }, + }, + }, + }, + }, + { + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Resources: v1.ResourceRequirements{ + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("8Gi"), + }, + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("4"), + v1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + }, + } + + podList, capacity, err := filterGuaranteedPods(podList, 32) + assert.NoError(t, err) + + assert.Equal(t, 3, len(podList)) + assert.Equal(t, 20, capacity) +} + +func TestThresholdMet(t *testing.T) { + t.Parallel() + + cpuTopology, err := machine.GenerateDummyCPUTopology(16, 1, 2) + require.NoError(t, err) + + for _, tc := range []struct { + name string + metrics map[string]map[string]float64 + podList []*v1.Pod + conf *config.Configuration + expectCondition *evictionpluginapi.Condition + expectScope string + expectPodName string + }{ + { + name: "not enable", + conf: makeConf(false, 2, 1, 0.6, 0.5, + 0.8, 10, 5*time.Minute, []string{}, -1, false, map[string][]string{}), + metrics: map[string]map[string]float64{ + consts2.MetricLoad1MinContainer: { + "pod1": 0.5, + "pod2": 1.5, + }, + consts2.MetricCPUUsageContainer: { + "pod1": 1, + "pod2": 2, + }, + }, + podList: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod1", + UID: "pod1", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod2", + UID: "pod2", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + }, + expectCondition: nil, + expectScope: "", + expectPodName: "", + }, + { + name: "not met", + conf: makeConf(true, 2, 1, 0.6, 0.5, + 0.8, 10, 5*time.Minute, []string{}, -1, false, map[string][]string{}), + metrics: map[string]map[string]float64{ + consts2.MetricLoad1MinContainer: { + "pod1": 0.5, + "pod2": 1.5, + }, + consts2.MetricCPUUsageContainer: { + "pod1": 1, + "pod2": 2, + }, + }, + podList: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod1", + UID: "pod1", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod2", + UID: "pod2", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + }, + expectCondition: nil, + expectScope: "", + expectPodName: "", + }, + { + name: "usage met soft without check CPU manager", + conf: makeConf(true, 2, 1, 0.6, 0.5, + 0.8, 10, 5*time.Minute, []string{}, -1, false, map[string][]string{}), + metrics: map[string]map[string]float64{ + consts2.MetricLoad1MinContainer: { + "pod1": 0.5, + "pod2": 1.5, + }, + consts2.MetricCPUUsageContainer: { + "pod1": 4, + "pod2": 5, + }, + }, + podList: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod1", + UID: "pod1", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod2", + UID: "pod2", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + }, + expectCondition: &evictionpluginapi.Condition{ + ConditionType: 0, + Effects: []string{string(v1.TaintEffectNoSchedule)}, + ConditionName: EvictionConditionSystemCPU, + MetCondition: true, + }, + expectScope: consts2.MetricCPUUsageContainer, + expectPodName: "", + }, + { + name: "both usage and load met soft without check CPU manager", + conf: makeConf(true, 2, 1, 0.6, 0.5, + 0.8, 10, 5*time.Minute, []string{}, -1, false, map[string][]string{}), + metrics: map[string]map[string]float64{ + consts2.MetricLoad1MinContainer: { + "pod1": 9, + "pod2": 8, + }, + consts2.MetricCPUUsageContainer: { + "pod1": 4, + "pod2": 5, + }, + }, + podList: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod1", + UID: "pod1", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod2", + UID: "pod2", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + }, + expectCondition: &evictionpluginapi.Condition{ + ConditionType: 0, + Effects: []string{string(v1.TaintEffectNoSchedule)}, + ConditionName: EvictionConditionSystemCPU, + MetCondition: true, + }, + expectScope: consts2.MetricLoad1MinContainer, + expectPodName: "", + }, + { + name: "both usage and load hard soft without check CPU manager", + conf: makeConf(true, 2, 1, 0.6, 0.5, + 0.8, 10, 5*time.Minute, []string{}, -1, false, map[string][]string{}), + metrics: map[string]map[string]float64{ + consts2.MetricLoad1MinContainer: { + "pod1": 18, + "pod2": 16, + }, + consts2.MetricCPUUsageContainer: { + "pod1": 5, + "pod2": 6, + }, + }, + podList: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod1", + UID: "pod1", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod2", + UID: "pod2", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + }, + expectCondition: &evictionpluginapi.Condition{ + ConditionType: 0, + Effects: []string{string(v1.TaintEffectNoSchedule)}, + ConditionName: EvictionConditionSystemCPU, + MetCondition: true, + }, + expectScope: consts2.MetricLoad1MinContainer, + expectPodName: "pod1", + }, + { + name: "usage hard soft without check CPU manager, sort by QoS", + conf: makeConf(true, 2, 1, 0.6, 0.5, + 0.8, 10, 5*time.Minute, []string{eviction.FakeMetricNativeQoSLevel, eviction.FakeMetricPriority}, -1, false, map[string][]string{}), + metrics: map[string]map[string]float64{ + consts2.MetricLoad1MinContainer: { + "pod1": 1, + "pod2": 1, + }, + consts2.MetricCPUUsageContainer: { + "pod1": 6, + "pod2": 5, + }, + }, + podList: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod1", + UID: "pod1", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + Resources: v1.ResourceRequirements{ + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("4"), + v1.ResourceMemory: resource.MustParse("8Gi"), + }, + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("8Gi"), + }, + }, + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod2", + UID: "pod2", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + }, + expectCondition: &evictionpluginapi.Condition{ + ConditionType: 0, + Effects: []string{string(v1.TaintEffectNoSchedule)}, + ConditionName: EvictionConditionSystemCPU, + MetCondition: true, + }, + expectScope: consts2.MetricCPUUsageContainer, + expectPodName: "pod2", + }, + { + name: "usage hard soft without check CPU manager, sort by priority", + conf: makeConf(true, 2, 1, 0.6, 0.5, + 0.8, 10, 5*time.Minute, []string{eviction.FakeMetricNativeQoSLevel, eviction.FakeMetricPriority}, -1, false, map[string][]string{}), + metrics: map[string]map[string]float64{ + consts2.MetricLoad1MinContainer: { + "pod1": 1, + "pod2": 1, + }, + consts2.MetricCPUUsageContainer: { + "pod1": 6, + "pod2": 5, + }, + }, + podList: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod1", + UID: "pod1", + }, + Spec: v1.PodSpec{ + Priority: &highPriority, + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod2", + UID: "pod2", + }, + Spec: v1.PodSpec{ + Priority: &lowPriority, + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + }, + expectCondition: &evictionpluginapi.Condition{ + ConditionType: 0, + Effects: []string{string(v1.TaintEffectNoSchedule)}, + ConditionName: EvictionConditionSystemCPU, + MetCondition: true, + }, + expectScope: consts2.MetricCPUUsageContainer, + expectPodName: "pod2", + }, + { + name: "usage hard soft without check CPU manager, sort by custom metrics", + conf: makeConf(true, 2, 1, 0.6, 0.5, + 0.8, 10, 5*time.Minute, []string{eviction.FakeMetricNativeQoSLevel, eviction.FakeMetricPriority, consts2.MetricMemRssContainer}, -1, false, map[string][]string{}), + metrics: map[string]map[string]float64{ + consts2.MetricLoad1MinContainer: { + "pod1": 1, + "pod2": 1, + }, + consts2.MetricCPUUsageContainer: { + "pod1": 6, + "pod2": 5, + }, + consts2.MetricMemRssContainer: { + "pod1": 99, + "pod2": 100, + }, + }, + podList: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod1", + UID: "pod1", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod2", + UID: "pod2", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + }, + expectCondition: &evictionpluginapi.Condition{ + ConditionType: 0, + Effects: []string{string(v1.TaintEffectNoSchedule)}, + ConditionName: EvictionConditionSystemCPU, + MetCondition: true, + }, + expectScope: consts2.MetricCPUUsageContainer, + expectPodName: "pod2", + }, + { + name: "usage hard soft without check CPU manager, sort by labels", + conf: makeConf(true, 2, 1, 0.6, 0.5, + 0.8, 10, 5*time.Minute, []string{eviction.FakeMetricQoSLevel, eviction.FakeMetricNativeQoSLevel, eviction.FakeMetricPriority, "label.testLabel", consts2.MetricMemRssContainer}, -1, false, + map[string][]string{ + "testLabel": { + "label3", "label2", "label1", + }, + }), + metrics: map[string]map[string]float64{ + consts2.MetricLoad1MinContainer: { + "pod1": 1, + "pod2": 1, + }, + consts2.MetricCPUUsageContainer: { + "pod1": 6, + "pod2": 5, + }, + consts2.MetricMemRssContainer: { + "pod1": 99, + "pod2": 100, + }, + }, + podList: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod1", + UID: "pod1", + Labels: map[string]string{ + "testLabel": "label1", + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod2", + UID: "pod2", + Labels: map[string]string{ + "testLabel": "label2", + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + }, + expectCondition: &evictionpluginapi.Condition{ + ConditionType: 0, + Effects: []string{string(v1.TaintEffectNoSchedule)}, + ConditionName: EvictionConditionSystemCPU, + MetCondition: true, + }, + expectScope: consts2.MetricCPUUsageContainer, + expectPodName: "pod1", + }, + { + name: "usage hard soft without check CPU manager, sort by owner", + conf: makeConf(true, 2, 1, 0.6, 0.5, + 0.8, 10, 5*time.Minute, []string{eviction.FakeMetricOwnerLevel, eviction.FakeMetricNativeQoSLevel, eviction.FakeMetricPriority}, -1, false, map[string][]string{}), + metrics: map[string]map[string]float64{ + consts2.MetricLoad1MinContainer: { + "pod1": 1, + "pod2": 1, + }, + consts2.MetricCPUUsageContainer: { + "pod1": 5, + "pod2": 6, + }, + }, + podList: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod1", + UID: "pod1", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod2", + UID: "pod2", + OwnerReferences: []v12.OwnerReference{ + { + Name: "test", + Kind: "DaemonSet", + }, + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + }, + expectCondition: &evictionpluginapi.Condition{ + ConditionType: 0, + Effects: []string{string(v1.TaintEffectNoSchedule)}, + ConditionName: EvictionConditionSystemCPU, + MetCondition: true, + }, + expectScope: consts2.MetricCPUUsageContainer, + expectPodName: "pod1", + }, + { + name: "not met with check CPU manager", + conf: makeConf(true, 2, 1, 0.6, 0.5, + 0.8, 10, 5*time.Minute, []string{eviction.FakeMetricNativeQoSLevel, eviction.FakeMetricPriority}, -1, true, + map[string][]string{}), + metrics: map[string]map[string]float64{ + consts2.MetricLoad1MinContainer: { + "pod1": 1, + "pod2": 1, + }, + consts2.MetricCPUUsageContainer: { + "pod1": 6, + "pod2": 5, + }, + }, + podList: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod1", + UID: "pod1", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + Resources: v1.ResourceRequirements{ + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("4"), + v1.ResourceMemory: resource.MustParse("8Gi"), + }, + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("4"), + v1.ResourceMemory: resource.MustParse("8Gi"), + }, + }, + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod2", + UID: "pod2", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + }, + expectCondition: nil, + expectScope: "", + expectPodName: "", + }, + { + name: "hard met with check CPU manager", + conf: makeConf(true, 2, 1, 0.6, 0.5, + 0.8, 10, 5*time.Minute, []string{eviction.FakeMetricNativeQoSLevel, eviction.FakeMetricPriority}, -1, true, + map[string][]string{}), + metrics: map[string]map[string]float64{ + consts2.MetricLoad1MinContainer: { + "pod1": 17, + "pod2": 17, + }, + consts2.MetricCPUUsageContainer: { + "pod1": 6, + "pod2": 5, + }, + }, + podList: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod1", + UID: "pod1", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + Resources: v1.ResourceRequirements{ + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("8Gi"), + }, + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("8Gi"), + }, + }, + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod2", + UID: "pod2", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fakeContainer", + }, + }, + }, + }, + }, + expectCondition: &evictionpluginapi.Condition{ + ConditionType: 0, + Effects: []string{string(v1.TaintEffectNoSchedule)}, + ConditionName: EvictionConditionSystemCPU, + MetCondition: true, + }, + expectScope: consts2.MetricLoad1MinContainer, + expectPodName: "pod2", + }, + } { + t.Run(tc.name, func(t *testing.T) { + now := time.Now() + metaServer := makeMetaServer(metric.NewFakeMetricsFetcher(metrics.DummyMetrics{}), cpuTopology) + + // set pod list + metaServer.PodFetcher = &pod.PodFetcherStub{ + PodList: tc.podList, + } + fakeMetricsFetcher := metric.NewFakeMetricsFetcher(metrics.DummyMetrics{}).(*metric.FakeMetricsFetcher) + // set fake metrics + for _, pod := range tc.podList { + for metricName, podMetrics := range tc.metrics { + if val, ok := podMetrics[string(pod.UID)]; ok { + fakeMetricsFetcher.SetContainerMetric(string(pod.UID), "fakeContainer", metricName, + utilmetric.MetricData{Value: val, Time: &now}) + } + } + } + metaServer.MetricsFetcher = fakeMetricsFetcher + metaServer.KubeletConfigFetcher = kubeletconfig.NewFakeKubeletConfigFetcher(v1beta1.KubeletConfiguration{ + CPUManagerPolicy: string(cpumanager.PolicyStatic), + }) + + // collect metrics + ep := NewCPUSystemPressureEvictionPlugin(nil, nil, metaServer, nil, tc.conf).(*SystemPressureEvictionPlugin) + for i := 0; i < tc.conf.DynamicAgentConfiguration.GetDynamicConfiguration().MetricRingSize; i++ { + ep.collectMetrics(context.TODO()) + } + + // check threshold met + resp, err := ep.ThresholdMet(context.TODO()) + require.NoError(t, err) + if tc.expectCondition == nil { + require.Nil(t, resp.Condition) + } else { + require.Equal(t, *(tc.expectCondition), *(resp.Condition)) + } + require.Equal(t, tc.expectScope, resp.EvictionScope) + + if resp.MetType == 2 { + podResp, err := ep.GetTopEvictionPods(context.TODO(), &evictionpluginapi.GetTopEvictionPodsRequest{ + ActivePods: tc.podList, + TopN: 1, + EvictionScope: resp.EvictionScope, + }) + require.NoError(t, err) + assert.Equal(t, tc.expectPodName, podResp.TargetPods[0].Name) + } + }) + } +} + +func TestName(t *testing.T) { + p := &SystemPressureEvictionPlugin{ + pluginName: EvictionPluginNameSystemCPUPressure, + } + assert.Equal(t, EvictionPluginNameSystemCPUPressure, p.Name()) +} + +func makeMetaServer(metricsFetcher metrictypes.MetricsFetcher, cpuTopology *machine.CPUTopology) *metaserver.MetaServer { + metaServer := &metaserver.MetaServer{ + MetaAgent: &agent.MetaAgent{}, + } + + metaServer.MetricsFetcher = metricsFetcher + metaServer.KatalystMachineInfo = &machine.KatalystMachineInfo{ + CPUTopology: cpuTopology, + } + + return metaServer +} + +func makeConf(enable bool, systemLoadUpperBoundRatio, systemLoadLowerBoundRatio, + systemUsageUpperBoundRatio, systemUsageLowerBoundRatio, thresholdMetPercentage float64, + metricRingSize int, evictionCoolDownTime time.Duration, evictionRankingMetrics []string, + gracePeriod int64, checkCPUManager bool, rankingLabelVals map[string][]string) *config.Configuration { + conf := config.NewConfiguration() + conf.GetDynamicConfiguration().EnableCPUSystemEviction = enable + conf.GetDynamicConfiguration().SystemLoadUpperBoundRatio = systemLoadUpperBoundRatio + conf.GetDynamicConfiguration().SystemLoadLowerBoundRatio = systemLoadLowerBoundRatio + conf.GetDynamicConfiguration().SystemUsageUpperBoundRatio = systemUsageUpperBoundRatio + conf.GetDynamicConfiguration().SystemUsageLowerBoundRatio = systemUsageLowerBoundRatio + conf.GetDynamicConfiguration().CPUSystemPressureEvictionPluginConfiguration.ThresholdMetPercentage = thresholdMetPercentage + conf.GetDynamicConfiguration().MetricRingSize = metricRingSize + conf.GetDynamicConfiguration().EvictionCoolDownTime = evictionCoolDownTime + conf.GetDynamicConfiguration().EvictionRankingMetrics = evictionRankingMetrics + conf.GetDynamicConfiguration().CPUSystemPressureEvictionPluginConfiguration.GracePeriod = gracePeriod + conf.GetDynamicConfiguration().CheckCPUManager = checkCPUManager + conf.GetDynamicConfiguration().RankingLabels = rankingLabelVals + return conf +} diff --git a/pkg/agent/evictionmanager/plugin/memory/helper.go b/pkg/agent/evictionmanager/plugin/memory/helper.go index c485c8aca8..2388980cac 100644 --- a/pkg/agent/evictionmanager/plugin/memory/helper.go +++ b/pkg/agent/evictionmanager/plugin/memory/helper.go @@ -137,6 +137,10 @@ func (e *EvictionHelper) getEvictionCmpFuncs(rankingMetrics []string, numaID int case evictionconfig.FakeMetricPriority: // prioritize evicting the pod whose priority is lower return general.ReverseCmpFunc(native.PodPriorityCmpFunc)(p1, p2) + case evictionconfig.FakeMetricNativeQoSLevel: + return native.PodQoSCmpFunc(p1, p2) + case evictionconfig.FakeMetricOwnerLevel: + return native.PodOwnerCmpFunc(p1, p2) default: p1Metric, p1Err := helper.GetPodMetric(e.metaServer.MetricsFetcher, e.emitter, p1, currentMetric, numaID) p2Metric, p2Err := helper.GetPodMetric(e.metaServer.MetricsFetcher, e.emitter, p2, currentMetric, numaID) diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go index 42dd4e717d..a2611cf665 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go @@ -40,6 +40,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/metrics" "github.com/kubewharf/katalyst-core/pkg/util/general" "github.com/kubewharf/katalyst-core/pkg/util/machine" + "github.com/kubewharf/katalyst-core/pkg/util/metric" "github.com/kubewharf/katalyst-core/pkg/util/native" ) @@ -347,8 +348,8 @@ func (p *CPUPressureLoadEviction) collectMetrics(_ context.Context) { continue } - snapshot := &MetricSnapshot{ - Info: MetricInfo{ + snapshot := &metric.MetricSnapshot{ + Info: metric.MetricInfo{ Name: metricName, Value: m.Value, }, @@ -439,8 +440,8 @@ func (p *CPUPressureLoadEviction) accumulateSharedPoolsLimit() int { // and its upper-bound and lower-bound are calculated by pool size. func (p *CPUPressureLoadEviction) collectPoolLoad(dynamicConfig *dynamic.Configuration, pressureByPoolSize bool, metricName string, metricValue float64, poolName string, poolSize int, collectTime int64) { - snapshot := &MetricSnapshot{ - Info: MetricInfo{ + snapshot := &metric.MetricSnapshot{ + Info: metric.MetricInfo{ Name: metricName, Value: metricValue, UpperBound: float64(poolSize) * dynamicConfig.LoadUpperBoundRatio, @@ -481,8 +482,8 @@ func (p *CPUPressureLoadEviction) collectPoolLoad(dynamicConfig *dynamic.Configu // and its upper-bound and lower-bound are not defined. func (p *CPUPressureLoadEviction) collectPoolMetricDefault(dynamicConfig *dynamic.Configuration, _ bool, metricName string, metricValue float64, poolName string, _ int, collectTime int64) { - snapshot := &MetricSnapshot{ - Info: MetricInfo{ + snapshot := &metric.MetricSnapshot{ + Info: metric.MetricInfo{ Name: metricName, Value: metricValue, }, @@ -495,7 +496,7 @@ func (p *CPUPressureLoadEviction) collectPoolMetricDefault(dynamicConfig *dynami // pushMetric stores and push-in metric for the given pod func (p *CPUPressureLoadEviction) pushMetric(dynamicConfig *dynamic.Configuration, - metricName, entryName, subEntryName string, snapshot *MetricSnapshot) { + metricName, entryName, subEntryName string, snapshot *metric.MetricSnapshot) { if p.metricsHistory[metricName] == nil { p.metricsHistory[metricName] = make(Entries) } @@ -505,13 +506,13 @@ func (p *CPUPressureLoadEviction) pushMetric(dynamicConfig *dynamic.Configuratio } if p.metricsHistory[metricName][entryName][subEntryName] == nil { - p.metricsHistory[metricName][entryName][subEntryName] = CreateMetricRing(dynamicConfig.LoadMetricRingSize) + p.metricsHistory[metricName][entryName][subEntryName] = metric.CreateMetricRing(dynamicConfig.LoadMetricRingSize) } p.metricsHistory[metricName][entryName][subEntryName].Push(snapshot) } -func (p *CPUPressureLoadEviction) logPoolSnapShot(snapshot *MetricSnapshot, poolName string, withBound bool) { +func (p *CPUPressureLoadEviction) logPoolSnapShot(snapshot *metric.MetricSnapshot, poolName string, withBound bool) { if snapshot == nil { return } diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_metric.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_metric.go index 684f157c58..9a9fac4792 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_metric.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_metric.go @@ -17,34 +17,13 @@ limitations under the License. package strategy import ( - "sync" - advisorapi "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor" "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" + "github.com/kubewharf/katalyst-core/pkg/util/metric" ) -type MetricInfo struct { - Name string - Value float64 - UpperBound float64 - LowerBound float64 -} - -type MetricSnapshot struct { - Info MetricInfo - Time int64 -} - -type MetricRing struct { - MaxLen int - Queue []*MetricSnapshot - CurrentIndex int - - sync.RWMutex -} - // SubEntries is keyed by container name or empty string (for pool) -type SubEntries map[string]*MetricRing +type SubEntries map[string]*metric.MetricRing func (se SubEntries) IsPoolEntry() bool { return len(se) == 1 && se[advisorapi.FakedContainerName] != nil @@ -55,60 +34,3 @@ type Entries map[string]SubEntries type PoolMetricCollectHandler func(dynamicConfig *dynamic.Configuration, poolsUnderPressure bool, metricName string, metricValue float64, poolName string, poolSize int, collectTime int64) - -func (ring *MetricRing) Sum() float64 { - ring.RLock() - defer ring.RUnlock() - - sum := 0.0 - for _, snapshot := range ring.Queue { - if snapshot != nil { - sum += snapshot.Info.Value - } - } - return sum -} - -func (ring *MetricRing) Push(snapShot *MetricSnapshot) { - ring.Lock() - defer ring.Unlock() - - if ring.CurrentIndex != -1 && snapShot != nil { - latestSnapShot := ring.Queue[ring.CurrentIndex] - if latestSnapShot != nil && latestSnapShot.Time == snapShot.Time { - return - } - } - - ring.CurrentIndex = (ring.CurrentIndex + 1) % ring.MaxLen - ring.Queue[ring.CurrentIndex] = snapShot -} - -func (ring *MetricRing) Count() (softOverCount, hardOverCount int) { - ring.RLock() - defer ring.RUnlock() - - for _, snapshot := range ring.Queue { - if snapshot == nil { - continue - } - - if snapshot.Info.LowerBound > 0 { - if snapshot.Info.Value > snapshot.Info.LowerBound { - softOverCount++ - } - if snapshot.Info.Value > snapshot.Info.UpperBound { - hardOverCount++ - } - } - } - return -} - -func CreateMetricRing(size int) *MetricRing { - return &MetricRing{ - MaxLen: size, - Queue: make([]*MetricSnapshot, size), - CurrentIndex: -1, - } -} diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_test.go index a364379e0c..91bd61481c 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_test.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_test.go @@ -825,7 +825,7 @@ func TestCPUPressureLoadEviction_collectMetrics(t *testing.T) { enableReclaim bool podEntries qrmstate.PodEntries loads map[string]map[string]float64 - wantSharedPoolSnapshots MetricInfo + wantSharedPoolSnapshots utilmetric.MetricInfo }{ { name: "use default bound, without dedicated core pod", @@ -986,7 +986,7 @@ func TestCPUPressureLoadEviction_collectMetrics(t *testing.T) { testName: 8, }, }, - wantSharedPoolSnapshots: MetricInfo{ + wantSharedPoolSnapshots: utilmetric.MetricInfo{ Name: consts.MetricLoad1MinContainer, Value: 2.4, UpperBound: 18, @@ -1177,7 +1177,7 @@ func TestCPUPressureLoadEviction_collectMetrics(t *testing.T) { testName: 8, }, }, - wantSharedPoolSnapshots: MetricInfo{ + wantSharedPoolSnapshots: utilmetric.MetricInfo{ Name: consts.MetricLoad1MinContainer, Value: 3.4, UpperBound: 8 * 1.8, @@ -1369,7 +1369,7 @@ func TestCPUPressureLoadEviction_collectMetrics(t *testing.T) { testName: 8, }, }, - wantSharedPoolSnapshots: MetricInfo{ + wantSharedPoolSnapshots: utilmetric.MetricInfo{ Name: consts.MetricLoad1MinContainer, Value: 3.4, LowerBound: 1, @@ -1561,7 +1561,7 @@ func TestCPUPressureLoadEviction_collectMetrics(t *testing.T) { testName: 8, }, }, - wantSharedPoolSnapshots: MetricInfo{ + wantSharedPoolSnapshots: utilmetric.MetricInfo{ Name: consts.MetricLoad1MinContainer, Value: 3.4, LowerBound: 4.4, diff --git a/pkg/config/agent/dynamic/adminqos/eviction/cpu_system_eviction.go b/pkg/config/agent/dynamic/adminqos/eviction/cpu_system_eviction.go new file mode 100644 index 0000000000..da4c8a5b1d --- /dev/null +++ b/pkg/config/agent/dynamic/adminqos/eviction/cpu_system_eviction.go @@ -0,0 +1,98 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package eviction + +import ( + "time" + + "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/crd" +) + +type CPUSystemPressureEvictionPluginConfiguration struct { + EnableCPUSystemEviction bool + SystemLoadUpperBoundRatio float64 + SystemLoadLowerBoundRatio float64 + SystemUsageUpperBoundRatio float64 + SystemUsageLowerBoundRatio float64 + ThresholdMetPercentage float64 + MetricRingSize int + EvictionCoolDownTime time.Duration + EvictionRankingMetrics []string + GracePeriod int64 + CheckCPUManager bool + RankingLabels map[string][]string +} + +func NewCPUSystemPressureEvictionPluginConfiguration() *CPUSystemPressureEvictionPluginConfiguration { + return &CPUSystemPressureEvictionPluginConfiguration{} +} + +func (c *CPUSystemPressureEvictionPluginConfiguration) ApplyConfiguration(conf *crd.DynamicConfigCRD) { + if aqc := conf.AdminQoSConfiguration; aqc != nil && + aqc.Spec.Config.EvictionConfig != nil && + aqc.Spec.Config.EvictionConfig.CPUSystemPressureEvictionConfig != nil { + + config := aqc.Spec.Config.EvictionConfig.CPUSystemPressureEvictionConfig + if config.EnableCPUSystemPressureEviction != nil { + c.EnableCPUSystemEviction = *(config.EnableCPUSystemPressureEviction) + } + + if config.LoadUpperBoundRatio != nil { + c.SystemLoadUpperBoundRatio = *(config.LoadUpperBoundRatio) + } + + if config.LoadLowerBoundRatio != nil { + c.SystemLoadLowerBoundRatio = *(config.LoadLowerBoundRatio) + } + + if config.UsageUpperBoundRatio != nil { + c.SystemUsageUpperBoundRatio = *(config.UsageUpperBoundRatio) + } + + if config.UsageLowerBoundRatio != nil { + c.SystemUsageLowerBoundRatio = *(config.UsageLowerBoundRatio) + } + + if config.ThresholdMetPercentage != nil { + c.ThresholdMetPercentage = *(config.ThresholdMetPercentage) + } + + if config.MetricRingSize != nil { + c.MetricRingSize = *(config.MetricRingSize) + } + + if config.EvictionCoolDownTime != nil { + c.EvictionCoolDownTime = config.EvictionCoolDownTime.Duration + } + + if config.EvictionRankingMetrics != nil { + c.EvictionRankingMetrics = config.EvictionRankingMetrics + } + + if config.GracePeriod != nil { + c.GracePeriod = *(config.GracePeriod) + } + + if config.CheckCPUManager != nil { + c.CheckCPUManager = *(config.CheckCPUManager) + } + + if config.RankingLabels != nil { + c.RankingLabels = config.RankingLabels + } + } +} diff --git a/pkg/config/agent/dynamic/adminqos/eviction/eviction_base.go b/pkg/config/agent/dynamic/adminqos/eviction/eviction_base.go index 2bcfe1324f..43709733da 100644 --- a/pkg/config/agent/dynamic/adminqos/eviction/eviction_base.go +++ b/pkg/config/agent/dynamic/adminqos/eviction/eviction_base.go @@ -30,15 +30,17 @@ type EvictionConfiguration struct { *RootfsPressureEvictionConfiguration *ReclaimedResourcesEvictionConfiguration *SystemLoadEvictionPluginConfiguration + *CPUSystemPressureEvictionPluginConfiguration } func NewEvictionConfiguration() *EvictionConfiguration { return &EvictionConfiguration{ - CPUPressureEvictionConfiguration: NewCPUPressureEvictionConfiguration(), - MemoryPressureEvictionConfiguration: NewMemoryPressureEvictionPluginConfiguration(), - RootfsPressureEvictionConfiguration: NewRootfsPressureEvictionPluginConfiguration(), - ReclaimedResourcesEvictionConfiguration: NewReclaimedResourcesEvictionConfiguration(), - SystemLoadEvictionPluginConfiguration: NewSystemLoadEvictionPluginConfiguration(), + CPUPressureEvictionConfiguration: NewCPUPressureEvictionConfiguration(), + MemoryPressureEvictionConfiguration: NewMemoryPressureEvictionPluginConfiguration(), + RootfsPressureEvictionConfiguration: NewRootfsPressureEvictionPluginConfiguration(), + ReclaimedResourcesEvictionConfiguration: NewReclaimedResourcesEvictionConfiguration(), + SystemLoadEvictionPluginConfiguration: NewSystemLoadEvictionPluginConfiguration(), + CPUSystemPressureEvictionPluginConfiguration: NewCPUSystemPressureEvictionPluginConfiguration(), } } @@ -53,4 +55,5 @@ func (c *EvictionConfiguration) ApplyConfiguration(conf *crd.DynamicConfigCRD) { c.RootfsPressureEvictionConfiguration.ApplyTo(conf) c.ReclaimedResourcesEvictionConfiguration.ApplyConfiguration(conf) c.SystemLoadEvictionPluginConfiguration.ApplyConfiguration(conf) + c.CPUSystemPressureEvictionPluginConfiguration.ApplyConfiguration(conf) } diff --git a/pkg/config/agent/dynamic/adminqos/eviction/memory_pressure_eviction.go b/pkg/config/agent/dynamic/adminqos/eviction/memory_pressure_eviction.go index 29e61df4b2..6d45265076 100644 --- a/pkg/config/agent/dynamic/adminqos/eviction/memory_pressure_eviction.go +++ b/pkg/config/agent/dynamic/adminqos/eviction/memory_pressure_eviction.go @@ -24,8 +24,10 @@ import ( // Fake metrics are not fetched from meta-server const ( - FakeMetricQoSLevel = "qos.pod" - FakeMetricPriority = "priority.pod" + FakeMetricQoSLevel = "qos.pod" + FakeMetricPriority = "priority.pod" + FakeMetricNativeQoSLevel = "native.qos.pod" + FakeMetricOwnerLevel = "owner.pod" ) const ( diff --git a/pkg/util/metric/ring.go b/pkg/util/metric/ring.go new file mode 100644 index 0000000000..ed6a558071 --- /dev/null +++ b/pkg/util/metric/ring.go @@ -0,0 +1,96 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metric + +import "sync" + +type MetricInfo struct { + Name string + Value float64 + UpperBound float64 + LowerBound float64 +} + +type MetricSnapshot struct { + Info MetricInfo + Time int64 +} + +type MetricRing struct { + MaxLen int + Queue []*MetricSnapshot + CurrentIndex int + + sync.RWMutex +} + +func (ring *MetricRing) Sum() float64 { + ring.RLock() + defer ring.RUnlock() + + sum := 0.0 + for _, snapshot := range ring.Queue { + if snapshot != nil { + sum += snapshot.Info.Value + } + } + return sum +} + +func (ring *MetricRing) Push(snapShot *MetricSnapshot) { + ring.Lock() + defer ring.Unlock() + + if ring.CurrentIndex != -1 && snapShot != nil { + latestSnapShot := ring.Queue[ring.CurrentIndex] + if latestSnapShot != nil && latestSnapShot.Time == snapShot.Time { + return + } + } + + ring.CurrentIndex = (ring.CurrentIndex + 1) % ring.MaxLen + ring.Queue[ring.CurrentIndex] = snapShot +} + +func (ring *MetricRing) Count() (softOverCount, hardOverCount int) { + ring.RLock() + defer ring.RUnlock() + + for _, snapshot := range ring.Queue { + if snapshot == nil { + continue + } + + if snapshot.Info.LowerBound > 0 { + if snapshot.Info.Value > snapshot.Info.LowerBound { + softOverCount++ + } + if snapshot.Info.Value > snapshot.Info.UpperBound { + hardOverCount++ + } + } + } + return +} + +func CreateMetricRing(size int) *MetricRing { + return &MetricRing{ + MaxLen: size, + Queue: make([]*MetricSnapshot, size), + CurrentIndex: -1, + } +} diff --git a/pkg/util/native/pod_resource.go b/pkg/util/native/pod_resource.go index 4b2775f8c5..4e325fe7df 100644 --- a/pkg/util/native/pod_resource.go +++ b/pkg/util/native/pod_resource.go @@ -19,6 +19,7 @@ package native import ( v1 "k8s.io/api/core/v1" "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" ) // PodResource key: namespace/name, value: pod requested ResourceList @@ -70,3 +71,42 @@ func CalculateResource(pod *v1.Pod) v1.ResourceList { } return resources } + +func PodGuaranteedCPUs(pod *v1.Pod) int { + // The maximum of requested CPUs by init containers. + requestedByInitContainers := 0 + for _, container := range pod.Spec.InitContainers { + if _, ok := container.Resources.Requests[v1.ResourceCPU]; !ok { + continue + } + requestedCPU := guaranteedCPUs(pod, &container) + if requestedCPU > requestedByInitContainers { + requestedByInitContainers = requestedCPU + } + } + // The sum of requested CPUs by app containers. + requestedByAppContainers := 0 + for _, container := range pod.Spec.Containers { + if _, ok := container.Resources.Requests[v1.ResourceCPU]; !ok { + continue + } + requestedByAppContainers += guaranteedCPUs(pod, &container) + } + + if requestedByInitContainers > requestedByAppContainers { + return requestedByInitContainers + } + return requestedByAppContainers +} + +func guaranteedCPUs(pod *v1.Pod, container *v1.Container) int { + if qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { + return 0 + } + cpuQuantity := container.Resources.Requests[v1.ResourceCPU] + if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() { + return 0 + } + + return int(cpuQuantity.Value()) +} diff --git a/pkg/util/native/pod_sorter.go b/pkg/util/native/pod_sorter.go index 910c1f9c87..3ab8192319 100644 --- a/pkg/util/native/pod_sorter.go +++ b/pkg/util/native/pod_sorter.go @@ -18,8 +18,8 @@ package native import ( v1 "k8s.io/api/core/v1" - corev1helpers "k8s.io/component-helpers/scheduling/corev1" + "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" "github.com/kubewharf/katalyst-core/pkg/util/general" ) @@ -75,6 +75,51 @@ func PodUniqKeyCmpFunc(i1, i2 interface{}) int { return general.CmpString(p1UniqKey, p2UniqKey) } +// PodQoSCmpFunc sorts QoS class of pod with less comparison +func PodQoSCmpFunc(i1, i2 interface{}) int { + p1QoSClass := qos.GetPodQOS(i1.(*v1.Pod)) + p2QoSClass := qos.GetPodQOS(i2.(*v1.Pod)) + + if p1QoSClass == p2QoSClass { + return 0 + } + + if p1QoSClass == v1.PodQOSGuaranteed { + return 1 + } + if p1QoSClass == v1.PodQOSBurstable { + if p2QoSClass == v1.PodQOSGuaranteed { + return -1 + } + return 1 + } + return -1 +} + +func PodOwnerCmpFunc(i1, i2 interface{}) int { + getOwnerKind := func(pod *v1.Pod) string { + if len(pod.OwnerReferences) <= 0 { + return "" + } + return pod.OwnerReferences[0].Kind + } + + p1Owner := getOwnerKind(i1.(*v1.Pod)) + p2Owner := getOwnerKind(i2.(*v1.Pod)) + if p1Owner == p2Owner { + return 0 + } + if p1Owner == "DaemonSet" { + return 1 + } + if p2Owner == "DaemonSet" { + return -1 + } + return 0 +} + var _ general.CmpFunc = PodPriorityCmpFunc var _ general.CmpFunc = PodCPURequestCmpFunc var _ general.CmpFunc = PodUniqKeyCmpFunc +var _ general.CmpFunc = PodQoSCmpFunc +var _ general.CmpFunc = PodOwnerCmpFunc