Skip to content

Commit

Permalink
feat(eviction): add cpu system pressure eviction plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
WangZzzhe committed Mar 21, 2024
1 parent 3b8b019 commit 567b8d9
Show file tree
Hide file tree
Showing 17 changed files with 2,069 additions and 104 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
Copyright 2022 The Katalyst Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package eviction

import (
"encoding/json"
"time"

cliflag "k8s.io/component-base/cli/flag"

"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/eviction"
)

const (
defaultEnableCPUSystemEviction = false
defaultSystemLoadUpperBoundRatio = 2
defaultSystemLoadLowerBoundRatio = 1
defaultSystemUsageUpperBoundRatio = 0.8
defaultSystemUsageLowerBoundRatio = 0.6
defaultThresholdMetPercentage = 0.8
defaultMetricRingSize = 10
defaultEvictionCoolDownTime = 300 * time.Second
defaultCheckCPUManager = false
defaultCPUSystemPressureEvictionGracePeriod = -1
)

var defaultEvictionRankingMetrics = []string{"qos.pod", "native.qos.pod", "priority.pod"}

type CPUSystemPressureEvictionOptions struct {
EnableCPUSystemEviction bool
SystemLoadUpperBoundRatio float64
SystemLoadLowerBoundRatio float64
SystemUsageUpperBoundRatio float64
SystemUsageLowerBoundRatio float64
ThresholdMetPercentage float64
MetricRingSize int
EvictionCoolDownTime time.Duration
EvictionRankingMetrics []string
GracePeriod int64
CheckCPUManager bool
RankingLabels StringToSlice
}

func NewCPUSystemPressureEvictionOptions() *CPUSystemPressureEvictionOptions {
return &CPUSystemPressureEvictionOptions{
EnableCPUSystemEviction: defaultEnableCPUSystemEviction,
SystemLoadUpperBoundRatio: defaultSystemLoadUpperBoundRatio,
SystemLoadLowerBoundRatio: defaultSystemLoadLowerBoundRatio,
SystemUsageUpperBoundRatio: defaultSystemUsageUpperBoundRatio,
SystemUsageLowerBoundRatio: defaultSystemUsageLowerBoundRatio,
ThresholdMetPercentage: defaultThresholdMetPercentage,
MetricRingSize: defaultMetricRingSize,
EvictionCoolDownTime: defaultEvictionCoolDownTime,
EvictionRankingMetrics: defaultEvictionRankingMetrics,
GracePeriod: defaultCPUSystemPressureEvictionGracePeriod,
CheckCPUManager: defaultCheckCPUManager,
RankingLabels: map[string][]string{},
}
}

func (o *CPUSystemPressureEvictionOptions) AddFlags(fss *cliflag.NamedFlagSets) {
fs := fss.FlagSet("eviction-cpu-system")

fs.BoolVar(&o.EnableCPUSystemEviction, "eviction-cpu-system-enable", o.EnableCPUSystemEviction,
"set true to enable cpu system eviction")
fs.Float64Var(&o.SystemLoadUpperBoundRatio, "eviction-cpu-system-load-upper-bound-ratio", o.SystemLoadUpperBoundRatio,
"multiply node capacity by this ration to get the load upper bound. "+
"if the load of the node is greater than the load upper bound repeatedly, the eviction will be triggered. "+
"default 2.0")
fs.Float64Var(&o.SystemLoadLowerBoundRatio, "eviction-cpu-system-load-lower-bound-ratio", o.SystemLoadLowerBoundRatio,
"multiply node capacity by this ration to get the load lower bound. "+
"if the load of the node is greater than the load lower bound repeatedly, node taint will be triggered. "+
"default 1.0")
fs.Float64Var(&o.SystemUsageUpperBoundRatio, "eviction-cpu-system-usage-upper-bound-ratio", o.SystemUsageUpperBoundRatio,
"multiply node capacity by this ration to get the usage upper bound. "+
"if the cpu usage of the node is greater than the usage upper bound repeatedly, the eviction will be triggered. "+
"default 0.8")
fs.Float64Var(&o.SystemUsageLowerBoundRatio, "eviction-cpu-system-usage-lower-bound-ratio", o.SystemUsageLowerBoundRatio,
"multiply node capacity by this ration to get the usage lower bound. "+
"if the cpu usage of the node is greater than the usage lower bound repeatedly, node taint will be triggered. "+
"default 0.6")
fs.Float64Var(&o.ThresholdMetPercentage, "eviction-cpu-system-threshold-met-percentage", o.ThresholdMetPercentage,
"the ratio between the times metric value over the bound value and the metric ring size is greater than this percentage "+
", the eviction or node taint will be triggered, default 0.8")
fs.IntVar(&o.MetricRingSize, "eviction-cpu-system-metric-ring-size", o.MetricRingSize,
"the size of the metric ring, which is used to cache and aggregate the metrics of the node, default 10")
fs.DurationVar(&o.EvictionCoolDownTime, "eviction-cpu-system-cool-down-time", o.EvictionCoolDownTime,
"the cool-down time of cpu system eviction, if the cpu system eviction is triggered, "+
"the cpu system eviction will be disabled for the cool-down time")
fs.StringSliceVar(&o.EvictionRankingMetrics, "eviction-cpu-system-ranking-metrics", o.EvictionRankingMetrics,
"metrics for ranking active pods when GetTopEvictionPods")
fs.Int64Var(&o.GracePeriod, "eviction-cpu-system-grace-period", o.GracePeriod,
"grace period when evicting pod")
fs.BoolVar(&o.CheckCPUManager, "eviction-cpu-system-check-cpumanager", o.CheckCPUManager,
"set true to check kubelet CPUManager policy, if CPUManager is on, guaranteed pods will be filtered when collecting metrics and evicting pods")
fs.Var(&o.RankingLabels, "eviction-cpu-system-ranking-labels", "custom ranking labels, The later label values in the array have a higher eviction precedence")
}

func (o *CPUSystemPressureEvictionOptions) ApplyTo(c *eviction.CPUSystemPressureEvictionPluginConfiguration) error {
c.EnableCPUSystemEviction = o.EnableCPUSystemEviction
c.SystemLoadUpperBoundRatio = o.SystemLoadUpperBoundRatio
c.SystemLoadLowerBoundRatio = o.SystemLoadLowerBoundRatio
c.SystemUsageUpperBoundRatio = o.SystemUsageUpperBoundRatio
c.SystemUsageLowerBoundRatio = o.SystemUsageLowerBoundRatio
c.ThresholdMetPercentage = o.ThresholdMetPercentage
c.MetricRingSize = o.MetricRingSize
c.EvictionCoolDownTime = o.EvictionCoolDownTime
c.EvictionRankingMetrics = o.EvictionRankingMetrics
c.GracePeriod = o.GracePeriod
c.CheckCPUManager = o.CheckCPUManager
c.RankingLabels = o.RankingLabels
return nil
}

type StringToSlice map[string][]string

func (s *StringToSlice) String() string {
res, err := json.Marshal(s)
if err != nil {
return ""
}
return string(res)
}

func (s *StringToSlice) Set(value string) error {
err := json.Unmarshal([]byte(value), s)
return err
}

func (s *StringToSlice) Type() string {
return "stringToSlice"
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ type EvictionOptions struct {
*ReclaimedResourcesEvictionOptions
*SystemLoadPressureEvictionOptions
*RootfsPressureEvictionOptions
*CPUSystemPressureEvictionOptions
}

func NewEvictionOptions() *EvictionOptions {
Expand All @@ -42,6 +43,7 @@ func NewEvictionOptions() *EvictionOptions {
ReclaimedResourcesEvictionOptions: NewReclaimedResourcesEvictionOptions(),
SystemLoadPressureEvictionOptions: NewSystemLoadPressureEvictionOptions(),
RootfsPressureEvictionOptions: NewRootfsPressureEvictionOptions(),
CPUSystemPressureEvictionOptions: NewCPUSystemPressureEvictionOptions(),
}
}

Expand All @@ -55,6 +57,7 @@ func (o *EvictionOptions) AddFlags(fss *cliflag.NamedFlagSets) {
o.ReclaimedResourcesEvictionOptions.AddFlags(fss)
o.SystemLoadPressureEvictionOptions.AddFlags(fss)
o.RootfsPressureEvictionOptions.AddFlags(fss)
o.CPUSystemPressureEvictionOptions.AddFlags(fss)
}

func (o *EvictionOptions) ApplyTo(c *eviction.EvictionConfiguration) error {
Expand All @@ -65,5 +68,6 @@ func (o *EvictionOptions) ApplyTo(c *eviction.EvictionConfiguration) error {
errList = append(errList, o.ReclaimedResourcesEvictionOptions.ApplyTo(c.ReclaimedResourcesEvictionConfiguration))
errList = append(errList, o.SystemLoadPressureEvictionOptions.ApplyTo(c.SystemLoadEvictionPluginConfiguration))
errList = append(errList, o.RootfsPressureEvictionOptions.ApplyTo(c.RootfsPressureEvictionConfiguration))
errList = append(errList, o.CPUSystemPressureEvictionOptions.ApplyTo(c.CPUSystemPressureEvictionPluginConfiguration))
return errors.NewAggregate(errList)
}
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ require (
)

replace (
github.com/kubewharf/katalyst-api => github.com/WangZzzhe/katalyst-api v0.0.0-20240321082255-89348b7117ac
k8s.io/api => k8s.io/api v0.24.6
k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.6
k8s.io/apimachinery => k8s.io/apimachinery v0.24.6
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWX
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg=
github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g=
github.com/WangZzzhe/katalyst-api v0.0.0-20240321082255-89348b7117ac h1:rt3Cy0yd/2f6I9+4rRZQkKV0V98txao7fZ8fYVOP3FA=
github.com/WangZzzhe/katalyst-api v0.0.0-20240321082255-89348b7117ac/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k=
github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c=
github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
Expand Down Expand Up @@ -554,8 +556,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kubewharf/katalyst-api v0.4.1-0.20240315044944-45cdd48ceedc h1:KD5OnzzF1B44TpV2W+nTqCghwW7jlCqjfCZ94z6QWLg=
github.com/kubewharf/katalyst-api v0.4.1-0.20240315044944-45cdd48ceedc/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k=
github.com/kubewharf/kubelet v1.24.6-kubewharf.8 h1:2e89T/nZTgzaVhyRsZuwEdRk8V8kJXs4PRkgfeG4Ai4=
github.com/kubewharf/kubelet v1.24.6-kubewharf.8/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8=
Expand Down
2 changes: 2 additions & 0 deletions pkg/agent/evictionmanager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
pluginapi "github.com/kubewharf/katalyst-api/pkg/protocol/evictionplugin/v1alpha1"
endpointpkg "github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/endpoint"
"github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin"
"github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin/cpu"
"github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin/memory"
"github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin/resource"
"github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin/rootfs"
Expand Down Expand Up @@ -123,6 +124,7 @@ func NewInnerEvictionPluginInitializers() map[string]plugin.InitFunc {
innerEvictionPluginInitializers[memory.EvictionPluginNameSystemMemoryPressure] = memory.NewSystemPressureEvictionPlugin
innerEvictionPluginInitializers[memory.EvictionPluginNameRssOveruse] = memory.NewRssOveruseEvictionPlugin
innerEvictionPluginInitializers[rootfs.EvictionPluginNamePodRootfsPressure] = rootfs.NewPodRootfsPressureEvictionPlugin
innerEvictionPluginInitializers[cpu.EvictionPluginNameSystemCPUPressure] = cpu.NewCPUSystemPressureEvictionPlugin
return innerEvictionPluginInitializers
}

Expand Down
Loading

0 comments on commit 567b8d9

Please sign in to comment.