diff --git a/apis/slo/v1alpha1/pod.go b/apis/slo/v1alpha1/pod.go index fe10319aa..501779127 100644 --- a/apis/slo/v1alpha1/pod.go +++ b/apis/slo/v1alpha1/pod.go @@ -67,12 +67,12 @@ func GetPodMemoryQoSConfig(pod *corev1.Pod) (*PodMemoryQOSConfig, error) { } const ( - // AnnotationCoreSchedGroupID is the annotation key of the group ID of the Linux Core Scheduling. + // LabelCoreSchedGroupID is the label key of the group ID of the Linux Core Scheduling. // Value should be a valid UUID or the none value "0". // When the value is a valid UUID, pods with that group ID and the equal CoreExpelled status on the node will be // assigned to the same core sched cookie. // When the value is the none value "0", pod will be reset to the default core sched cookie `0`. - // When the annotation is missing but the node-level strategy enables the core sched, the pod will be assigned an + // When the k-v pair is missing but the node-level strategy enables the core sched, the pod will be assigned an // internal group according to the pod's UID. // // Core Sched: https://docs.kernel.org/admin-guide/hw-vuln/core-scheduling.html @@ -83,20 +83,20 @@ const ( // enables the individual cookie from pods of other QoS classes via adding a suffix for the group ID. So the pods // of different QoS will take different cookies when their CoreExpelled status are diverse even if their group ID // are the same. - AnnotationCoreSchedGroupID = apiext.DomainPrefix + "core-sched-group-id" + LabelCoreSchedGroupID = apiext.DomainPrefix + "core-sched-group-id" // CoreSchedGroupIDNone is the none value of the core sched group ID which indicates the core sched is disabled for // the pod. The pod will be reset to the system-default cookie `0`. CoreSchedGroupIDNone = "0" ) -// GetCoreSchedGroupID gets the core sched group ID from the pod annotations. +// GetCoreSchedGroupID gets the core sched group ID from the pod labels. // It returns the core sched group ID and whether the pod explicitly disables the core sched. -func GetCoreSchedGroupID(annotations map[string]string) (string, *bool) { - if annotations == nil { +func GetCoreSchedGroupID(labels map[string]string) (string, *bool) { + if labels == nil { return "", nil } - value, ok := annotations[AnnotationCoreSchedGroupID] + value, ok := labels[LabelCoreSchedGroupID] if !ok { return "", nil } diff --git a/go.mod b/go.mod index fde0f41b1..4818c1505 100644 --- a/go.mod +++ b/go.mod @@ -35,9 +35,9 @@ require ( github.com/stretchr/testify v1.8.2 go.uber.org/atomic v1.10.0 go.uber.org/multierr v1.6.0 - golang.org/x/crypto v0.11.0 - golang.org/x/net v0.12.0 - golang.org/x/sys v0.10.0 + golang.org/x/crypto v0.14.0 + golang.org/x/net v0.16.0 + golang.org/x/sys v0.13.0 golang.org/x/time v0.0.0-20220920022843-2ce7c2934d45 google.golang.org/grpc v1.51.0 google.golang.org/protobuf v1.28.1 @@ -204,12 +204,12 @@ require ( go.opentelemetry.io/proto/otlp v0.19.0 // indirect go.uber.org/goleak v1.2.0 // indirect go.uber.org/zap v1.19.1 // indirect - golang.org/x/mod v0.12.0 // indirect + golang.org/x/mod v0.13.0 // indirect golang.org/x/oauth2 v0.0.0-20220909003341-f21342109be1 // indirect - golang.org/x/sync v0.3.0 // indirect - golang.org/x/term v0.10.0 // indirect - golang.org/x/text v0.11.0 // indirect - golang.org/x/tools v0.11.0 // indirect + golang.org/x/sync v0.4.0 // indirect + golang.org/x/term v0.13.0 // indirect + golang.org/x/text v0.13.0 // indirect + golang.org/x/tools v0.14.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect google.golang.org/api v0.96.0 // indirect diff --git a/go.sum b/go.sum index 47949af05..6110aa122 100644 --- a/go.sum +++ b/go.sum @@ -1335,8 +1335,8 @@ golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= -golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY= +golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -1404,8 +1404,8 @@ golang.org/x/net v0.0.0-20220909164309-bea034e7d591/go.mod h1:YDH+HFinaLZZlnHAfS golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= -golang.org/x/net v0.12.0 h1:cfawfvKITfUsFCeJIHJrbSxpeu/E81khclypR0GVT50= -golang.org/x/net v0.12.0/go.mod h1:zEVYFnQC7m/vmpQFELhcD1EWkZlX69l4oqgmer6hfKA= +golang.org/x/net v0.16.0 h1:7eBu7KsSvFDtSXUIDbh3aqlK4DPsZ1rByC8PFfBThos= +golang.org/x/net v0.16.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -1448,8 +1448,8 @@ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= -golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ= +golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1570,14 +1570,14 @@ golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= -golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= -golang.org/x/term v0.10.0 h1:3R7pNqamzBraeqj/Tj8qt1aQ2HpmlC+Cx/qL/7hn4/c= -golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o= +golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= +golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -1589,8 +1589,8 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4= -golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -1663,8 +1663,8 @@ golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.11.0 h1:EMCa6U9S2LtZXLAMoWiR/R8dAQFRqbAitmbJ2UKhoi8= -golang.org/x/tools v0.11.0/go.mod h1:anzJrxPjNtfgiYQYirP2CPGzGLxrH2u2QBhn6Bf3qY8= +golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc= +golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/pkg/koordlet/metrics/core_sched.go b/pkg/koordlet/metrics/core_sched.go new file mode 100644 index 000000000..c541440dc --- /dev/null +++ b/pkg/koordlet/metrics/core_sched.go @@ -0,0 +1,92 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "strconv" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/koordinator-sh/koordinator/pkg/util/metrics" +) + +const ( + CoreSchedCookieKey = "core_sched_cookie" + CoreSchedGroupKey = "core_sched_group" +) + +var ( + ContainerCoreSchedCookie = metrics.NewGCGaugeVec("container_core_sched_cookie", prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Subsystem: KoordletSubsystem, + Name: "container_core_sched_cookie", + Help: "the core scheduling cookie of the container", + }, []string{NodeKey, PodName, PodNamespace, PodUID, ContainerName, ContainerID, CoreSchedGroupKey, CoreSchedCookieKey})) + + CoreSchedCookieManageStatus = metrics.NewGCCounterVec("core_sched_cookie_manage_status", prometheus.NewCounterVec(prometheus.CounterOpts{ + Subsystem: KoordletSubsystem, + Name: "core_sched_cookie_manage_status", + Help: "the manage status of the core scheduling cookie", + }, []string{NodeKey, CoreSchedGroupKey, StatusKey})) + + CoreSchedCollector = []prometheus.Collector{ + ContainerCoreSchedCookie.GetGaugeVec(), + CoreSchedCookieManageStatus.GetCounterVec(), + } +) + +func RecordContainerCoreSchedCookie(namespace, podName, podUID, containerName, containerID, groupID string, cookieID uint64) { + labels := genNodeLabels() + if labels == nil { + return + } + labels[PodNamespace] = namespace + labels[PodName] = podName + labels[PodUID] = podUID + labels[ContainerName] = containerName + labels[ContainerID] = containerID + labels[CoreSchedGroupKey] = groupID + labels[CoreSchedCookieKey] = strconv.FormatUint(cookieID, 10) + ContainerCoreSchedCookie.WithSet(labels, 1.0) +} + +func ResetContainerCoreSchedCookie(namespace, podName, podUID, containerName, containerID, groupID string, cookieID uint64) { + labels := genNodeLabels() + if labels == nil { + return + } + labels[PodNamespace] = namespace + labels[PodName] = podName + labels[PodUID] = podUID + labels[ContainerName] = containerName + labels[ContainerID] = containerID + labels[CoreSchedGroupKey] = groupID + labels[CoreSchedCookieKey] = strconv.FormatUint(cookieID, 10) + ContainerCoreSchedCookie.Delete(labels) +} + +func RecordCoreSchedCookieManageStatus(groupID string, isSucceeded bool) { + labels := genNodeLabels() + if labels == nil { + return + } + labels[CoreSchedGroupKey] = groupID + labels[StatusKey] = StatusSucceed + if !isSucceeded { + labels[StatusKey] = StatusFailed + } + CoreSchedCookieManageStatus.WithInc(labels) +} diff --git a/pkg/koordlet/metrics/metrics.go b/pkg/koordlet/metrics/metrics.go index c557f3d65..aaff8c645 100644 --- a/pkg/koordlet/metrics/metrics.go +++ b/pkg/koordlet/metrics/metrics.go @@ -32,6 +32,7 @@ func init() { prometheus.MustRegister(CPUSuppressCollector...) prometheus.MustRegister(CPUBurstCollector...) prometheus.MustRegister(PredictionCollectors...) + prometheus.MustRegister(CoreSchedCollector...) } const ( diff --git a/pkg/koordlet/metrics/metrics_test.go b/pkg/koordlet/metrics/metrics_test.go index 281fa67be..20d887c4c 100644 --- a/pkg/koordlet/metrics/metrics_test.go +++ b/pkg/koordlet/metrics/metrics_test.go @@ -27,6 +27,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" apiext "github.com/koordinator-sh/koordinator/apis/extension" + slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1" "github.com/koordinator-sh/koordinator/pkg/koordlet/resourceexecutor" "github.com/koordinator-sh/koordinator/pkg/util" ) @@ -287,3 +288,53 @@ func TestPredictorCollectors(t *testing.T) { RecordNodePredictedResourceReclaimable(string(corev1.ResourceMemory), UnitByte, "testPredictor", float64(testNodeReclaimable.Memory().Value())) }) } + +func TestCoreSchedCollector(t *testing.T) { + testCoreSchedGroup := "test-core-sched-group" + testCoreSchedCookie := uint64(2000000000) + testingNode := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node", + Labels: map[string]string{}, + }, + Status: corev1.NodeStatus{ + Allocatable: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("100"), + corev1.ResourceMemory: resource.MustParse("200Gi"), + apiext.BatchCPU: resource.MustParse("50000"), + apiext.BatchMemory: resource.MustParse("80Gi"), + }, + Capacity: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("100"), + corev1.ResourceMemory: resource.MustParse("200Gi"), + apiext.BatchCPU: resource.MustParse("50000"), + apiext.BatchMemory: resource.MustParse("80Gi"), + }, + }, + } + testingPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "test-ns", + UID: "xxxxxx", + Labels: map[string]string{ + slov1alpha1.LabelCoreSchedGroupID: testCoreSchedGroup, + }, + }, + Status: corev1.PodStatus{ + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://ccccccccc", + }, + }, + }, + } + t.Run("test", func(t *testing.T) { + Register(testingNode) + defer Register(nil) + RecordContainerCoreSchedCookie(testingPod.Namespace, testingPod.Name, string(testingPod.UID), + testingPod.Status.ContainerStatuses[0].Name, testingPod.Status.ContainerStatuses[0].ContainerID, + testCoreSchedGroup, testCoreSchedCookie) + }) +} diff --git a/pkg/koordlet/resourceexecutor/cgroup.go b/pkg/koordlet/resourceexecutor/cgroup.go index 1c9340267..094d42115 100644 --- a/pkg/koordlet/resourceexecutor/cgroup.go +++ b/pkg/koordlet/resourceexecutor/cgroup.go @@ -170,6 +170,7 @@ func readCgroupAndParseUint64(parentDir string, r sysutil.Resource) (uint64, err // ReadCgroupAndParseInt32Slice reads the given cgroup content and parses it into an int32 slice. // e.g. content: "1\n23\n0\n4\n56789" -> []int32{ 1, 23, 0, 4, 56789 } +// TODO: refactor via Generics. func readCgroupAndParseInt32Slice(parentDir string, r sysutil.Resource) ([]int32, error) { s, err := cgroupFileRead(parentDir, r) if err != nil { diff --git a/pkg/koordlet/resourceexecutor/reader.go b/pkg/koordlet/resourceexecutor/reader.go index 6cbd2f181..d19068eb5 100644 --- a/pkg/koordlet/resourceexecutor/reader.go +++ b/pkg/koordlet/resourceexecutor/reader.go @@ -37,6 +37,7 @@ type CgroupReader interface { ReadMemoryStat(parentDir string) (*sysutil.MemoryStatRaw, error) ReadMemoryNumaStat(parentDir string) ([]sysutil.NumaMemoryPages, error) ReadCPUTasks(parentDir string) ([]int32, error) + ReadCPUProcs(parentDir string) ([]uint32, error) ReadPSI(parentDir string) (*PSIByResource, error) ReadMemoryColdPageUsage(parentDir string) (uint64, error) } @@ -69,32 +70,6 @@ func (r *CgroupV1Reader) ReadCPUShares(parentDir string) (int64, error) { return readCgroupAndParseInt64(parentDir, resource) } -func (r *CgroupV1Reader) ReadPSI(parentDir string) (*PSIByResource, error) { - cpuPressureResource, ok := sysutil.DefaultRegistry.Get(sysutil.CgroupVersionV1, sysutil.CPUAcctCPUPressureName) - if !ok { - return nil, ErrResourceNotRegistered - } - memPressureResource, ok := sysutil.DefaultRegistry.Get(sysutil.CgroupVersionV1, sysutil.CPUAcctMemoryPressureName) - if !ok { - return nil, ErrResourceNotRegistered - } - ioPressureResource, ok := sysutil.DefaultRegistry.Get(sysutil.CgroupVersionV1, sysutil.CPUAcctIOPressureName) - if !ok { - return nil, ErrResourceNotRegistered - } - - paths := PSIPath{ - CPU: cpuPressureResource.Path(parentDir), - Mem: memPressureResource.Path(parentDir), - IO: ioPressureResource.Path(parentDir), - } - psi, err := getPSIByResource(paths) - if err != nil { - return nil, err - } - return psi, nil -} - func (r *CgroupV1Reader) ReadCPUSet(parentDir string) (*cpuset.CPUSet, error) { resource, ok := sysutil.DefaultRegistry.Get(sysutil.CgroupVersionV1, sysutil.CPUSetCPUSName) if !ok { @@ -190,6 +165,22 @@ func (r *CgroupV1Reader) ReadMemoryNumaStat(parentDir string) ([]sysutil.NumaMem return v, nil } +func (r *CgroupV1Reader) ReadMemoryColdPageUsage(parentDir string) (uint64, error) { + resource, ok := sysutil.DefaultRegistry.Get(sysutil.CgroupVersionV1, sysutil.MemoryIdlePageStatsName) + if !ok { + return 0, ErrResourceNotRegistered + } + s, err := cgroupFileRead(parentDir, resource) + if err != nil { + return 0, err + } + v, err := sysutil.ParseMemoryIdlePageStats(s) + if err != nil { + return 0, err + } + return v.GetColdPageTotalBytes(), nil +} + func (r *CgroupV1Reader) ReadCPUTasks(parentDir string) ([]int32, error) { resource, ok := sysutil.DefaultRegistry.Get(sysutil.CgroupVersionV1, sysutil.CPUTasksName) if !ok { @@ -199,20 +190,44 @@ func (r *CgroupV1Reader) ReadCPUTasks(parentDir string) ([]int32, error) { return readCgroupAndParseInt32Slice(parentDir, resource) } -func (r *CgroupV1Reader) ReadMemoryColdPageUsage(parentDir string) (uint64, error) { - resource, ok := sysutil.DefaultRegistry.Get(sysutil.CgroupVersionV1, sysutil.MemoryIdlePageStatsName) +func (r *CgroupV1Reader) ReadCPUProcs(parentDir string) ([]uint32, error) { + resource, ok := sysutil.DefaultRegistry.Get(sysutil.CgroupVersionV1, sysutil.CPUProcsName) if !ok { - return 0, ErrResourceNotRegistered + return nil, ErrResourceNotRegistered } s, err := cgroupFileRead(parentDir, resource) if err != nil { - return 0, err + return nil, err } - v, err := sysutil.ParseMemoryIdlePageStats(s) + + // content: `7742\n10971\n11049\n11051...` + return sysutil.ParseCgroupProcs(s) +} + +func (r *CgroupV1Reader) ReadPSI(parentDir string) (*PSIByResource, error) { + cpuPressureResource, ok := sysutil.DefaultRegistry.Get(sysutil.CgroupVersionV1, sysutil.CPUAcctCPUPressureName) + if !ok { + return nil, ErrResourceNotRegistered + } + memPressureResource, ok := sysutil.DefaultRegistry.Get(sysutil.CgroupVersionV1, sysutil.CPUAcctMemoryPressureName) + if !ok { + return nil, ErrResourceNotRegistered + } + ioPressureResource, ok := sysutil.DefaultRegistry.Get(sysutil.CgroupVersionV1, sysutil.CPUAcctIOPressureName) + if !ok { + return nil, ErrResourceNotRegistered + } + + paths := PSIPath{ + CPU: cpuPressureResource.Path(parentDir), + Mem: memPressureResource.Path(parentDir), + IO: ioPressureResource.Path(parentDir), + } + psi, err := getPSIByResource(paths) if err != nil { - return 0, err + return nil, err } - return v.GetColdPageTotalBytes(), nil + return psi, nil } var _ CgroupReader = &CgroupV2Reader{} @@ -367,6 +382,11 @@ func (r *CgroupV2Reader) ReadMemoryNumaStat(parentDir string) ([]sysutil.NumaMem return v, nil } +func (r *CgroupV2Reader) ReadMemoryColdPageUsage(parentDir string) (uint64, error) { + // cgroup v2 has not implemented yet + return 0, ErrResourceNotRegistered +} + func (r *CgroupV2Reader) ReadCPUTasks(parentDir string) ([]int32, error) { resource, ok := sysutil.DefaultRegistry.Get(sysutil.CgroupVersionV2, sysutil.CPUTasksName) if !ok { @@ -376,6 +396,20 @@ func (r *CgroupV2Reader) ReadCPUTasks(parentDir string) ([]int32, error) { return readCgroupAndParseInt32Slice(parentDir, resource) } +func (r *CgroupV2Reader) ReadCPUProcs(parentDir string) ([]uint32, error) { + resource, ok := sysutil.DefaultRegistry.Get(sysutil.CgroupVersionV2, sysutil.CPUProcsName) + if !ok { + return nil, ErrResourceNotRegistered + } + s, err := cgroupFileRead(parentDir, resource) + if err != nil { + return nil, err + } + + // content: `7742\n10971\n11049\n11051...` + return sysutil.ParseCgroupProcs(s) +} + func (r *CgroupV2Reader) ReadPSI(parentDir string) (*PSIByResource, error) { cpuPressureResource, ok := sysutil.DefaultRegistry.Get(sysutil.CgroupVersionV2, sysutil.CPUAcctCPUPressureName) if !ok { @@ -402,11 +436,6 @@ func (r *CgroupV2Reader) ReadPSI(parentDir string) (*PSIByResource, error) { return psi, nil } -// cgroup v2 has not implemented yet -func (r *CgroupV2Reader) ReadMemoryColdPageUsage(parentDir string) (uint64, error) { - return 0, ErrResourceNotRegistered -} - func NewCgroupReader() CgroupReader { if sysutil.GetCurrentCgroupVersion() == sysutil.CgroupVersionV2 { return &CgroupV2Reader{} diff --git a/pkg/koordlet/resourceexecutor/updater.go b/pkg/koordlet/resourceexecutor/updater.go index 7b3b0ca2e..48d8643e4 100644 --- a/pkg/koordlet/resourceexecutor/updater.go +++ b/pkg/koordlet/resourceexecutor/updater.go @@ -46,6 +46,7 @@ func init() { DefaultCgroupUpdaterFactory.Register(NewCommonCgroupUpdater, sysutil.CPUBurstName, sysutil.CPUBVTWarpNsName, + sysutil.CPUIdleName, sysutil.CPUTasksName, sysutil.CPUProcsName, sysutil.MemoryWmarkRatioName, diff --git a/pkg/koordlet/runtimehooks/config.go b/pkg/koordlet/runtimehooks/config.go index 2c46940c7..cd99502ee 100644 --- a/pkg/koordlet/runtimehooks/config.go +++ b/pkg/koordlet/runtimehooks/config.go @@ -26,6 +26,7 @@ import ( "github.com/koordinator-sh/koordinator/pkg/features" "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks/batchresource" + "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks/coresched" "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks/cpunormalization" "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks/cpuset" "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks/gpu" @@ -66,6 +67,12 @@ const ( // owner: @saintube @zwzhang0107 // alpha: v1.4 CPUNormalization featuregate.Feature = "CPUNormalization" + + // CoreSched manages Linux Core Scheduling cookies for containers who enable the core sched. + // + // owner: @saintube @zwzhang0107 + // alpha: v1.4 + CoreSched featuregate.Feature = "CoreSched" ) var ( @@ -75,6 +82,7 @@ var ( GPUEnvInject: {Default: false, PreRelease: featuregate.Alpha}, BatchResource: {Default: true, PreRelease: featuregate.Beta}, CPUNormalization: {Default: false, PreRelease: featuregate.Alpha}, + CoreSched: {Default: false, PreRelease: featuregate.Alpha}, } runtimeHookPlugins = map[featuregate.Feature]HookPlugin{ @@ -83,6 +91,7 @@ var ( GPUEnvInject: gpu.Object(), BatchResource: batchresource.Object(), CPUNormalization: cpunormalization.Object(), + CoreSched: coresched.Object(), } ) diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/cookie_cache.go b/pkg/koordlet/runtimehooks/hooks/coresched/cookie_cache.go new file mode 100644 index 000000000..dae9aa83a --- /dev/null +++ b/pkg/koordlet/runtimehooks/hooks/coresched/cookie_cache.go @@ -0,0 +1,164 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package coresched + +import ( + "sort" + "sync" + + sysutil "github.com/koordinator-sh/koordinator/pkg/koordlet/util/system" +) + +// CookieCacheEntry is an entry which stores the cookie ID and its belonging PIDs. +type CookieCacheEntry struct { + rwMutex sync.RWMutex + cookieID uint64 + pidCache *PIDCache +} + +func newCookieCacheEntry(cookieID uint64, pids ...uint32) *CookieCacheEntry { + m := &PIDCache{} + m.AddAny(pids...) + return &CookieCacheEntry{ + cookieID: cookieID, + pidCache: m, + } +} + +func (c *CookieCacheEntry) DeepCopy() *CookieCacheEntry { + c.rwMutex.RLock() + defer c.rwMutex.RUnlock() + copiedM := c.pidCache.DeepCopy() + return &CookieCacheEntry{ + cookieID: c.cookieID, + pidCache: copiedM, + } +} + +func (c *CookieCacheEntry) GetCookieID() uint64 { + c.rwMutex.RLock() + defer c.rwMutex.RUnlock() + return c.cookieID +} + +func (c *CookieCacheEntry) SetCookieID(cookieID uint64) { + c.rwMutex.Lock() + defer c.rwMutex.Unlock() + c.cookieID = cookieID +} + +func (c *CookieCacheEntry) IsEntryInvalid() bool { + c.rwMutex.RLock() + defer c.rwMutex.RUnlock() + return c.cookieID <= sysutil.DefaultCoreSchedCookieID || c.pidCache.Len() <= 0 +} + +func (c *CookieCacheEntry) HasPID(pid uint32) bool { + c.rwMutex.RLock() + defer c.rwMutex.RUnlock() + return c.pidCache.Has(pid) +} + +func (c *CookieCacheEntry) ContainsPIDs(pids ...uint32) []uint32 { + c.rwMutex.RLock() + defer c.rwMutex.RUnlock() + var notFoundPIDs []uint32 + for _, pid := range pids { + if !c.pidCache.Has(pid) { + notFoundPIDs = append(notFoundPIDs, pid) + } + } + return notFoundPIDs +} + +// GetAllPIDs gets all PIDs sorted in ascending order. +func (c *CookieCacheEntry) GetAllPIDs() []uint32 { + c.rwMutex.RLock() + defer c.rwMutex.RUnlock() + return c.pidCache.GetAllSorted() +} + +func (c *CookieCacheEntry) AddPIDs(pids ...uint32) { + if len(pids) <= 0 { + return + } + c.rwMutex.Lock() + defer c.rwMutex.Unlock() + c.pidCache.AddAny(pids...) +} + +func (c *CookieCacheEntry) DeletePIDs(pids ...uint32) { + if len(pids) <= 0 { + return + } + c.rwMutex.Lock() + defer c.rwMutex.Unlock() + c.pidCache.DeleteAny(pids...) +} + +type PIDCache map[uint32]struct{} + +func NewPIDCache(pids ...uint32) *PIDCache { + p := &PIDCache{} + p.AddAny(pids...) + return p +} + +func (p PIDCache) DeepCopy() *PIDCache { + copiedM := map[uint32]struct{}{} + for pid := range p { + copiedM[pid] = struct{}{} + } + return (*PIDCache)(&copiedM) +} + +func (p PIDCache) Len() int { + return len(p) +} + +func (p PIDCache) Has(pid uint32) bool { + _, ok := p[pid] + return ok +} + +func (p PIDCache) GetAllSorted() []uint32 { + if len(p) <= 0 { + return nil + } + pids := make([]uint32, len(p)) + i := 0 + for pid := range p { + pids[i] = pid + i++ + } + sort.Slice(pids, func(i, j int) bool { + return pids[i] < pids[j] + }) + return pids +} + +func (p PIDCache) AddAny(pids ...uint32) { + for _, pid := range pids { + p[pid] = struct{}{} + } +} + +func (p PIDCache) DeleteAny(pids ...uint32) { + for _, pid := range pids { + delete(p, pid) + } +} diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/cookie_cache_test.go b/pkg/koordlet/runtimehooks/hooks/coresched/cookie_cache_test.go new file mode 100644 index 000000000..844d4a37d --- /dev/null +++ b/pkg/koordlet/runtimehooks/hooks/coresched/cookie_cache_test.go @@ -0,0 +1,260 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package coresched + +import ( + "math/rand" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestCookieCacheEntry(t *testing.T) { + type args struct { + initCookieID uint64 + orderedPIDs []uint32 + setCookieID uint64 + addPIDs []uint32 + } + type expects struct { + expectInvalid bool + expectOrderedPIDsAdded []uint32 + expectOrderedPIDsDeleted []uint32 + } + tests := []struct { + name string + args args + expects expects + }{ + { + name: "empty entry is invalid", + args: args{ + initCookieID: 0, + }, + expects: expects{ + expectInvalid: true, + }, + }, + { + name: "valid entry", + args: args{ + initCookieID: 100000000, + orderedPIDs: []uint32{ + 1, + 2, + 1000, + 1001, + 1010, + 1100, + 2000, + 2002, + }, + setCookieID: 100000000, + }, + expects: expects{ + expectInvalid: false, + expectOrderedPIDsAdded: []uint32{ + 1, + 2, + 1000, + 1001, + 1010, + 1100, + 2000, + 2002, + }, + expectOrderedPIDsDeleted: []uint32{ + 1, + 2, + 1000, + 1001, + 1010, + 1100, + 2000, + 2002, + }, + }, + }, + { + name: "valid entry add", + args: args{ + initCookieID: 100000000, + orderedPIDs: []uint32{ + 10000, + 10010, + }, + setCookieID: 200000000, + addPIDs: []uint32{ + 10001, + }, + }, + expects: expects{ + expectInvalid: false, + expectOrderedPIDsAdded: []uint32{ + 10000, + 10001, + 10010, + }, + expectOrderedPIDsDeleted: []uint32{ + 10000, + 10010, + }, + }, + }, + { + name: "valid entry add 1", + args: args{ + initCookieID: 100000000, + orderedPIDs: []uint32{ + 10, + 1000, + 1001, + 1010, + 1100, + 2000, + 2002, + }, + setCookieID: 100000000, + addPIDs: []uint32{ + 3, + 1011, + 3000, + }, + }, + expects: expects{ + expectInvalid: false, + expectOrderedPIDsAdded: []uint32{ + 3, + 10, + 1000, + 1001, + 1010, + 1011, + 1100, + 2000, + 2002, + 3000, + }, + expectOrderedPIDsDeleted: []uint32{ + 10, + 1000, + 1001, + 1010, + 1100, + 2000, + 2002, + }, + }, + }, + { + name: "valid entry add 2", + args: args{ + initCookieID: 100000000, + orderedPIDs: []uint32{ + 10, + 1000, + 1001, + 1100, + 2000, + 2002, + }, + setCookieID: 100000000, + addPIDs: []uint32{ + 1001, + 1010, + 3100, + 3000, + }, + }, + expects: expects{ + expectInvalid: false, + expectOrderedPIDsAdded: []uint32{ + 10, + 1000, + 1001, + 1010, + 1100, + 2000, + 2002, + 3000, + 3100, + }, + expectOrderedPIDsDeleted: []uint32{ + 10, + 1000, + 1100, + 2000, + 2002, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pidsOO := testGetOutOfOrderUint32Slice(tt.args.orderedPIDs) + + // ordered after init + entry := newCookieCacheEntry(tt.args.initCookieID, pidsOO...) + assert.NotNil(t, entry) + assert.Equal(t, tt.args.initCookieID, entry.GetCookieID()) + assert.Equal(t, tt.args.orderedPIDs, entry.GetAllPIDs(), pidsOO) + + // check valid + assert.Equal(t, tt.expects.expectInvalid, entry.IsEntryInvalid()) + + // set cookie + entry.SetCookieID(tt.args.setCookieID) + assert.Equal(t, tt.args.setCookieID, entry.GetCookieID()) + + // pid exists after init + if len(tt.args.orderedPIDs) > 0 { + assert.True(t, entry.HasPID(tt.args.orderedPIDs[0])) + } + + // ordered after add + entry.AddPIDs(tt.args.addPIDs...) + assert.Equal(t, tt.expects.expectOrderedPIDsAdded, entry.GetAllPIDs()) + + // pid exists after add + if len(tt.args.addPIDs) > 0 { + assert.True(t, entry.HasPID(tt.args.addPIDs[0])) + } + + // ordered after delete + entry.DeletePIDs(tt.args.addPIDs...) + assert.Equal(t, tt.expects.expectOrderedPIDsDeleted, entry.GetAllPIDs()) + + // pid not exists after delete + if len(tt.args.addPIDs) > 0 { + assert.False(t, entry.HasPID(tt.args.addPIDs[0])) + } + + // deep copy + c := entry.DeepCopy() + assert.Equal(t, c, entry) + }) + } +} + +func testGetOutOfOrderUint32Slice(ss []uint32) []uint32 { + s := make([]uint32, len(ss)) + copy(s, ss) + rand.Shuffle(len(s), func(i, j int) { + s[i], s[j] = s[j], s[i] + }) + return s +} diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/core_sched.go b/pkg/koordlet/runtimehooks/hooks/coresched/core_sched.go new file mode 100644 index 000000000..e9b864c81 --- /dev/null +++ b/pkg/koordlet/runtimehooks/hooks/coresched/core_sched.go @@ -0,0 +1,495 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package coresched + +import ( + "fmt" + "sync" + "time" + + gocache "github.com/patrickmn/go-cache" + "go.uber.org/atomic" + "k8s.io/klog/v2" + "k8s.io/utils/pointer" + + "github.com/koordinator-sh/koordinator/apis/extension" + slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1" + "github.com/koordinator-sh/koordinator/pkg/koordlet/metrics" + "github.com/koordinator-sh/koordinator/pkg/koordlet/resourceexecutor" + "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks" + "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/protocol" + "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/reconciler" + "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/rule" + "github.com/koordinator-sh/koordinator/pkg/koordlet/statesinformer" + "github.com/koordinator-sh/koordinator/pkg/koordlet/util" + sysutil "github.com/koordinator-sh/koordinator/pkg/koordlet/util/system" +) + +const ( + name = "CoreSched" + description = "manage core sched cookies for pod and containers" + + ruleNameForNodeSLO = name + " (nodeSLO)" + ruleNameForAllPods = name + " (allPods)" + + defaultCacheExpiration = 300 * time.Second + defaultCacheDeleteInterval = 600 * time.Second + + // ExpellerGroupSuffix is the default suffix of the expeller core sched group. + ExpellerGroupSuffix = "-expeller" +) + +// SYSTEM QoS is excluded from the cookie mutating. +// All SYSTEM pods use the default cookie so the agent can reset the cookie of a container by ShareTo its cookie to +// the target. +var podQOSConditions = []string{string(extension.QoSBE), string(extension.QoSLS), string(extension.QoSLSR), + string(extension.QoSLSE), string(extension.QoSNone)} + +// Plugin is responsible for managing core sched cookies and cpu.idle for containers. +type Plugin struct { + rule *Rule + + initialized *atomic.Bool // whether the cache has been initialized + allPodsSyncOnce sync.Once // sync once for AllPods + + sysSupported *bool + supportedMsg string + sysEnabled bool + + cookieCache *gocache.Cache // core-sched-group-id -> cookie id, set; if the group has had cookie + cookieCacheRWMutex sync.RWMutex + groupCache *gocache.Cache // pod-uid+container-id -> core-sched-group-id (note that it caches the last state); if the container has had cookie of the group + + reader resourceexecutor.CgroupReader + executor resourceexecutor.ResourceUpdateExecutor + cse sysutil.CoreSchedExtendedInterface +} + +var singleton *Plugin + +func Object() *Plugin { + if singleton == nil { + singleton = newPlugin() + } + return singleton +} + +func newPlugin() *Plugin { + return &Plugin{ + rule: newRule(), + cookieCache: gocache.New(defaultCacheExpiration, defaultCacheDeleteInterval), + groupCache: gocache.New(defaultCacheExpiration, defaultCacheDeleteInterval), + initialized: atomic.NewBool(false), + allPodsSyncOnce: sync.Once{}, + } +} + +func (p *Plugin) Register(op hooks.Options) { + klog.V(5).Infof("register hook %v", name) + // TODO: hook NRI events RunPodSandbox, PostStartContainer + rule.Register(ruleNameForNodeSLO, description, + rule.WithParseFunc(statesinformer.RegisterTypeNodeSLOSpec, p.parseRuleForNodeSLO), + rule.WithUpdateCallback(p.ruleUpdateCb)) + rule.Register(ruleNameForAllPods, description, + rule.WithParseFunc(statesinformer.RegisterTypeAllPods, p.parseForAllPods), + rule.WithUpdateCallback(p.ruleUpdateCb)) + reconciler.RegisterCgroupReconciler(reconciler.ContainerLevel, sysutil.VirtualCoreSchedCookie, + "set core sched cookie to process groups of container specified", + p.SetContainerCookie, reconciler.PodQOSFilter(), podQOSConditions...) + reconciler.RegisterCgroupReconciler(reconciler.SandboxLevel, sysutil.VirtualCoreSchedCookie, + "set core sched cookie to process groups of sandbox container specified", + p.SetContainerCookie, reconciler.PodQOSFilter(), podQOSConditions...) + // TODO: support host application + reconciler.RegisterCgroupReconciler(reconciler.KubeQOSLevel, sysutil.CPUIdle, "reconcile QoS level cpu idle", + p.SetKubeQOSCPUIdle, reconciler.NoneFilter()) + p.Setup(op) +} + +func (p *Plugin) Setup(op hooks.Options) { + p.reader = op.Reader + p.executor = op.Executor + p.cse = sysutil.NewCoreSchedExtended() +} + +func (p *Plugin) SystemSupported() (bool, string) { + if p.sysSupported == nil { + isSupported, msg := sysutil.EnableCoreSchedIfSupported() + p.sysSupported = pointer.Bool(isSupported) + p.supportedMsg = msg + klog.Infof("update system supported info for plugin %s, supported %v, msg %s", + name, *p.sysSupported, p.supportedMsg) + } + return *p.sysSupported, p.supportedMsg +} + +func (p *Plugin) InitCache(podMetas []*statesinformer.PodMeta) bool { + if p.initialized.Load() { + return true + } + + synced := p.LoadAllCookies(podMetas) + + p.initialized.Store(synced) + return synced +} + +func (p *Plugin) IsCacheInited() bool { + return p.initialized.Load() +} + +func (p *Plugin) SetKubeQOSCPUIdle(proto protocol.HooksProtocol) error { + kubeQOSCtx := proto.(*protocol.KubeQOSContext) + if kubeQOSCtx == nil { + return fmt.Errorf("kubeQOS protocol is nil for plugin %s", name) + } + kubeQOS := kubeQOSCtx.Request.KubeQOSClass + + if !p.rule.IsInited() { + klog.V(5).Infof("plugin %s has not been inited, rule inited %v, aborted to set cpu idle for QoS %s", + name, p.rule.IsInited(), kubeQOS) + return nil + } + + isCPUIdle := p.rule.IsKubeQOSCPUIdle(kubeQOS) + if isCPUIdle { + kubeQOSCtx.Response.Resources.CPUIdle = pointer.Int64(1) + } else { + kubeQOSCtx.Response.Resources.CPUIdle = pointer.Int64(0) + } + + return nil +} + +// SetContainerCookie reconciles the core sched cookie for the container. +// There are the following operations about the cookies: +// 1. Get: Get the cookie for a core sched group, firstly try finding in cache and then get from the existing PIDs. +// 2. Add: Add a new cookie for a core sched group for a container, and add a new entry into the cache. +// 3. Assign: Assign a cookie of an existing core sched group for a container and update the cache entry. +// The cached sibling PIDs (i.e. the PIDs of the same core sched group) will be fetched in the Assign. If all +// cookies of the sibling PIDs are default or invalid, the Assign should fall back to Add. +// 4. Clear: Clear a cookie of an existing core sched group for a container (reset to default cookie 0), and the +// containers' PIDs are removed from the cache. The cache entry of the group is removed when the number of the +// cached PIDs decreases to zero. +// +// If multiple non-default cookies are assigned to existing containers of the same group, the firstly-created and +// available cookie will be retained and the PIDs of others will be moved to the former. +// NOTE: The agent itself should be set the default cookie. It can be excluded by setting QoS to SYSTEM. +func (p *Plugin) SetContainerCookie(proto protocol.HooksProtocol) error { + containerCtx := proto.(*protocol.ContainerContext) + if containerCtx == nil { + return fmt.Errorf("container protocol is nil for plugin %s", name) + } + if !util.IsValidContainerCgroupDir(containerCtx.Request.CgroupParent) { + return fmt.Errorf("invalid container cgroup parent %s for plugin %s", containerCtx.Request.CgroupParent, name) + } + + podUID := containerCtx.Request.PodMeta.UID + // only process sandbox container or container has valid ID + if len(podUID) <= 0 || len(containerCtx.Request.ContainerMeta.ID) <= 0 { + return fmt.Errorf("invalid container ID for plugin %s, pod UID %s, container ID %s", + name, podUID, containerCtx.Request.ContainerMeta.ID) + } + + if !p.rule.IsInited() || !p.IsCacheInited() { + klog.V(5).Infof("plugin %s has not been inited, rule inited %v, cache inited %v, aborted to set cookie for container %s/%s", + name, p.rule.IsInited(), p.IsCacheInited(), containerCtx.Request.PodMeta.String(), containerCtx.Request.ContainerMeta.Name) + return nil + } + + isEnabled, groupID := p.getPodEnabledAndGroup(containerCtx.Request.PodAnnotations, containerCtx.Request.PodLabels, + util.GetKubeQoSByCgroupParent(containerCtx.Request.CgroupParent), podUID) + klog.V(6).Infof("manage cookie for container %s/%s, isEnabled %v, groupID %s", + containerCtx.Request.PodMeta.String(), containerCtx.Request.ContainerMeta.Name, isEnabled, groupID) + + // expect enabled + // 1. disabled -> enabled: Add or Assign. + // 2. keep enabled: Check the differences of cookie, group ID and the PIDs, and do Assign. + if isEnabled { + return p.enableContainerCookie(containerCtx, groupID) + } + // else pod disables + + return p.disableContainerCookie(containerCtx, groupID) +} + +// LoadAllCookies syncs the current core sched cookies of all pods into the cookie cache. +func (p *Plugin) LoadAllCookies(podMetas []*statesinformer.PodMeta) bool { + hasSynced := false + p.cookieCacheRWMutex.Lock() + defer p.cookieCacheRWMutex.Unlock() + for _, podMeta := range podMetas { + pod := podMeta.Pod + podAnnotations := pod.Annotations + podLabels := pod.Labels + podUID := string(pod.UID) + + if !podMeta.IsRunningOrPending() { + klog.V(6).Infof("skip sync core sched cookie for pod %s, pod is non-running, phase %s", + podMeta.Key(), pod.Status.Phase) + continue + } + + isEnabled, groupID := p.getPodEnabledAndGroup(podAnnotations, podLabels, extension.GetKubeQosClass(pod), podUID) + + containerPIDs := p.getAllContainerPIDs(podMeta) + + for _, cPID := range containerPIDs { + containerID := cPID.ContainerID + pids := cPID.PID + if len(pids) <= 0 { + klog.V(5).Infof("aborted to get PIDs for container %s/%s, err: no available PID", + podMeta.Key(), containerID) + continue + } + + cookieID, pidsSynced, err := p.getCookie(pids, groupID) + if err != nil { + klog.V(0).Infof("failed to sync cookie for container %s/%s, err: %s", + podMeta.Key(), containerID, err) + continue + } + + // container synced including using the default cookie + hasSynced = true + + if cookieID <= sysutil.DefaultCoreSchedCookieID { + klog.V(6).Infof("skipped to sync cookie for container %s/%s, default cookie is set, enabled %v, group %s", + podMeta.Key(), containerID, isEnabled, groupID) + continue + } + + var cookieEntry *CookieCacheEntry + cookieEntryIf, groupHasCookie := p.cookieCache.Get(groupID) + if groupHasCookie { + cookieEntry = cookieEntryIf.(*CookieCacheEntry) + // If multiple cookie exists for a group, aborted to sync cache. Let the reconciliation fix these. + if lastCookieID := cookieEntry.GetCookieID(); lastCookieID > sysutil.DefaultCoreSchedCookieID && + lastCookieID != cookieID { + klog.Warningf("sync cookie for container %s/%s failed, isEnabled %v, groupID %s, cookie %v, but got existing cookie %v", + podMeta.Key(), containerID, isEnabled, groupID, cookieID, lastCookieID) + continue + } + cookieEntry.AddPIDs(pidsSynced...) + } else { + cookieEntry = newCookieCacheEntry(cookieID, pidsSynced...) + } + + p.cookieCache.SetDefault(groupID, cookieEntry) + containerUID := p.getContainerUID(podUID, containerID) + p.groupCache.SetDefault(containerUID, groupID) + klog.V(4).Infof("sync cookie for container %s/%s finished, isEnabled %v, groupID %s, cookie %v", + podMeta.Key(), containerID, isEnabled, groupID, cookieID) + metrics.RecordContainerCoreSchedCookie(pod.Namespace, pod.Name, podUID, cPID.ContainerName, containerID, + groupID, cookieID) + } + } + + return hasSynced +} + +// enableContainerCookie adds or assigns a core sched cookie for the container. +func (p *Plugin) enableContainerCookie(containerCtx *protocol.ContainerContext, groupID string) error { + podMetaName := containerCtx.Request.PodMeta.String() + containerName := containerCtx.Request.ContainerMeta.Name + podUID := containerCtx.Request.PodMeta.UID + containerUID := p.getContainerUID(podUID, containerCtx.Request.ContainerMeta.ID) + lastGroupID, _, cookieEntry := p.getCookieCacheForContainer(groupID, containerUID) + + // assert groupID != "0" + // NOTE: if the group ID changed for a enabled pod, the cookie will be updated while the old PIDs should expire + // in the old cookie's cache. + pids, err := p.getContainerPIDs(containerCtx.Request.CgroupParent) + if err != nil { + klog.V(5).Infof("failed to get PIDs for container %s/%s, err: %s", podMetaName, containerName, err) + return nil + } + if len(pids) <= 0 { + klog.V(5).Infof("no PID found for container %s/%s, group %s", podMetaName, containerName, groupID) + return nil + } + + if cookieEntry != nil { + // firstly try Assign, if all cached sibling pids invalid, then try Add + // else cookie exists for group: + // 1. assign cookie if the container has not set cookie + // 2. assign cookie if some process of the container has missing cookie or set incoherent cookie + targetCookieID := cookieEntry.GetCookieID() + + if notFoundPIDs := cookieEntry.ContainsPIDs(pids...); len(notFoundPIDs) <= 0 { + klog.V(6).Infof("assign cookie for container %s/%s skipped, group %s, cookie %v, PID num %v", + podMetaName, containerName, groupID, cookieEntry.GetCookieID(), len(pids)) + p.updateCookieCacheForContainer(groupID, containerUID, cookieEntry) + recordContainerCookieMetrics(containerCtx, groupID, targetCookieID) + + return nil + } + + // do Assign + siblingPIDs := cookieEntry.GetAllPIDs() + pidsAssigned, sPIDsToDelete, err := p.assignCookie(pids, siblingPIDs, groupID, targetCookieID) + if err == nil { + if lastGroupID != groupID { + klog.V(4).Infof("assign cookie for container %s/%s finished, last group %s, group %s, cookie %v, PID num %v, assigned %v", + podMetaName, containerName, lastGroupID, groupID, targetCookieID, len(pids), len(pidsAssigned)) + } else { + klog.V(5).Infof("assign cookie for container %s/%s finished, cookie %v, PID num %v, assigned %v", + podMetaName, containerName, targetCookieID, len(pids), len(pidsAssigned)) + } + + if len(pidsAssigned) <= 0 { // no pid is successfully assigned + return nil + } + + cookieEntry.AddPIDs(pidsAssigned...) + cookieEntry.DeletePIDs(sPIDsToDelete...) + p.updateCookieCacheForContainer(groupID, containerUID, cookieEntry) + recordContainerCookieMetrics(containerCtx, groupID, targetCookieID) + + return nil + } + + metrics.RecordCoreSchedCookieManageStatus(groupID, false) + klog.V(4).Infof("failed to assign cookie for container %s/%s, fallback to add new cookie, group %s, old cookie %v, PID num %v, err: %v", + podMetaName, containerName, groupID, targetCookieID, len(pids), err) + + // no valid sibling PID, fallback to Add + cookieEntry.DeletePIDs(sPIDsToDelete...) + p.cleanCookieCacheForContainer(groupID, containerUID, cookieEntry) + } + + // group has no cookie, do Add + cookieID, pidAdded, err := p.addCookie(pids, groupID) + if err != nil { + metrics.RecordCoreSchedCookieManageStatus(groupID, false) + klog.V(4).Infof("failed to add cookie for container %s/%s, group %s, PID num %v, err: %v", + podMetaName, containerName, groupID, len(pids), err) + return nil + } + if cookieID <= sysutil.DefaultCoreSchedCookieID { + klog.V(4).Infof("failed to add cookie for container %s/%s, group %s, PID num %v, got unexpected cookie %v", + podMetaName, containerName, groupID, len(pids), cookieID) + return nil + } + + cookieEntry = newCookieCacheEntry(cookieID, pidAdded...) + p.updateCookieCacheForContainer(groupID, containerUID, cookieEntry) + recordContainerCookieMetrics(containerCtx, groupID, cookieID) + + klog.V(4).Infof("add cookie for container %s/%s finished, group %s, cookie %v, PID num %v", + podMetaName, containerName, groupID, cookieID, len(pids)) + return nil +} + +// disableContainerCookie clears a core sched cookie for the container. +func (p *Plugin) disableContainerCookie(containerCtx *protocol.ContainerContext, groupID string) error { + podMetaName := containerCtx.Request.PodMeta.String() + containerName := containerCtx.Request.ContainerMeta.Name + podUID := containerCtx.Request.PodMeta.UID + containerUID := p.getContainerUID(podUID, containerCtx.Request.ContainerMeta.ID) + lastGroupID, lastCookieEntry, _ := p.getCookieCacheForContainer(groupID, containerUID) + + // invalid lastGroupID means container not in group cache (container should be cleared or not ever added) + // invalid lastCookieEntry means group not in cookie cache (group should be cleared) + // let its cached PIDs expire or removed by siblings' Assign + if (len(lastGroupID) <= 0 || lastGroupID == slov1alpha1.CoreSchedGroupIDNone) && lastCookieEntry == nil { + return nil + } + + pids, err := p.getContainerPIDs(containerCtx.Request.CgroupParent) + if err != nil { + klog.V(5).Infof("failed to get PIDs for container %s/%s, err: %s", + podMetaName, containerName, err) + return nil + } + if len(pids) <= 0 { + klog.V(5).Infof("no PID found for container %s/%s, group %s", podMetaName, containerName, groupID) + return nil + } + + // In case the pod has group set before while no cookie entry, do Clear to fix it + if lastCookieEntry == nil { + lastCookieEntry = newCookieCacheEntry(sysutil.DefaultCoreSchedCookieID) + } + lastCookieID := lastCookieEntry.GetCookieID() + + // do Clear: + // - clear cookie if any process of the container has set cookie + pidsToClear := p.clearCookie(pids, lastGroupID, lastCookieID) + lastCookieEntry.DeletePIDs(pidsToClear...) + p.cleanCookieCacheForContainer(lastGroupID, containerUID, lastCookieEntry) + resetContainerCookieMetrics(containerCtx, lastGroupID, lastCookieID) + + klog.V(4).Infof("clear cookie for container %s/%s finished, last group %s, last cookie %v, PID num %v", + podMetaName, containerName, lastGroupID, lastCookieID, len(pids)) + + return nil +} + +// getCookieCacheForPod gets the last group ID, the last cookie entry and the cookie entry for the current group. +// If a pod has not set cookie before, return lastGroupID=0 and lastCookieEntry=nil. +func (p *Plugin) getCookieCacheForContainer(groupID, containerUID string) (string, *CookieCacheEntry, *CookieCacheEntry) { + p.cookieCacheRWMutex.RLock() + defer p.cookieCacheRWMutex.RUnlock() + + lastGroupIDIf, containerHasGroup := p.groupCache.Get(containerUID) + lastGroupID := slov1alpha1.CoreSchedGroupIDNone + if containerHasGroup { + lastGroupID = lastGroupIDIf.(string) + } + + lastCookieEntryIf, lastGroupHasCookie := p.cookieCache.Get(lastGroupID) + var lastCookieEntry *CookieCacheEntry + if lastGroupHasCookie { + lastCookieEntry = lastCookieEntryIf.(*CookieCacheEntry) + if lastCookieEntry.IsEntryInvalid() { // no valid cookie ref + lastCookieEntry = nil + } + } + + cookieEntryIf, groupHasCookie := p.cookieCache.Get(groupID) + var cookieEntry *CookieCacheEntry + if groupHasCookie { + cookieEntry = cookieEntryIf.(*CookieCacheEntry) + if cookieEntry.IsEntryInvalid() { // no valid cookie ref + cookieEntry = nil + } + } + + return lastGroupID, lastCookieEntry, cookieEntry +} + +func (p *Plugin) updateCookieCacheForContainer(groupID, containerUID string, cookieEntry *CookieCacheEntry) { + p.cookieCacheRWMutex.Lock() + defer p.cookieCacheRWMutex.Unlock() + p.groupCache.SetDefault(containerUID, groupID) + if cookieEntry.IsEntryInvalid() { + p.cookieCache.Delete(groupID) + } else { + p.cookieCache.SetDefault(groupID, cookieEntry) + } +} + +func (p *Plugin) cleanCookieCacheForContainer(groupID, containerUID string, cookieEntry *CookieCacheEntry) { + p.cookieCacheRWMutex.Lock() + defer p.cookieCacheRWMutex.Unlock() + p.groupCache.Delete(containerUID) + if cookieEntry.IsEntryInvalid() { + p.cookieCache.Delete(groupID) + } else { + p.cookieCache.SetDefault(groupID, cookieEntry) + } +} diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/core_sched_test.go b/pkg/koordlet/runtimehooks/hooks/coresched/core_sched_test.go new file mode 100644 index 000000000..a94d7cedd --- /dev/null +++ b/pkg/koordlet/runtimehooks/hooks/coresched/core_sched_test.go @@ -0,0 +1,1951 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package coresched + +import ( + "sync" + "testing" + + gocache "github.com/patrickmn/go-cache" + "github.com/stretchr/testify/assert" + "go.uber.org/atomic" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" + + "github.com/koordinator-sh/koordinator/apis/extension" + slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1" + "github.com/koordinator-sh/koordinator/pkg/koordlet/resourceexecutor" + "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks" + "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/protocol" + "github.com/koordinator-sh/koordinator/pkg/koordlet/statesinformer" + "github.com/koordinator-sh/koordinator/pkg/koordlet/util" + sysutil "github.com/koordinator-sh/koordinator/pkg/koordlet/util/system" +) + +func TestPlugin(t *testing.T) { + t.Run("test", func(t *testing.T) { + p := newPlugin() + assert.NotNil(t, p) + p.Register(hooks.Options{ + Reader: resourceexecutor.NewCgroupReader(), + }) + }) +} + +func TestPluginSystemSupported(t *testing.T) { + type fields struct { + prepareFn func(helper *sysutil.FileTestUtil) + } + type wants struct { + systemSupported bool + supportMsg string + } + tests := []struct { + name string + fields fields + wants wants + }{ + { + name: "plugin unsupported since no sched features file", + wants: wants{ + systemSupported: false, + supportMsg: "core sched not supported", + }, + }, + { + name: "plugin unsupported since no core sched in sched features", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + featuresPath := sysutil.SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, `FEATURE_A FEATURE_B FEATURE_C`) + }, + }, + wants: wants{ + systemSupported: false, + supportMsg: "core sched not supported", + }, + }, + { + name: "plugin supported since core sched disabled but can be enabled by sysctl", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + sysctlFeaturePath := sysutil.GetProcSysFilePath(sysutil.KernelSchedCore) + helper.WriteFileContents(sysctlFeaturePath, "0\n") + }, + }, + wants: wants{ + systemSupported: true, + supportMsg: "", + }, + }, + { + name: "plugin supported since core sched in sched features", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + featuresPath := sysutil.SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, `FEATURE_A FEATURE_B FEATURE_C CORE_SCHED`) + }, + }, + wants: wants{ + systemSupported: true, + }, + }, + { + name: "plugin supported since core sched enabled by sysctl", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + sysctlFeaturePath := sysutil.GetProcSysFilePath(sysutil.KernelSchedCore) + helper.WriteFileContents(sysctlFeaturePath, "1\n") + }, + }, + wants: wants{ + systemSupported: true, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + helper := sysutil.NewFileTestUtil(t) + defer helper.Cleanup() + if tt.fields.prepareFn != nil { + tt.fields.prepareFn(helper) + } + + p := newPlugin() + p.Setup(hooks.Options{ + Reader: resourceexecutor.NewCgroupReader(), + Executor: resourceexecutor.NewTestResourceExecutor(), + }) + sysSupported, supportMsg := p.SystemSupported() + assert.Equal(t, tt.wants.systemSupported, sysSupported) + assert.Equal(t, tt.wants.supportMsg, supportMsg) + }) + } +} + +func TestPlugin_SetContainerCookie(t *testing.T) { + type fields struct { + prepareFn func(helper *sysutil.FileTestUtil) + plugin *Plugin + preparePluginFn func(p *Plugin) + cse sysutil.CoreSchedExtendedInterface + groupID string + } + type wantFields struct { + cookieToPIDs map[uint64][]uint32 + groupToCookie map[string]uint64 + } + tests := []struct { + name string + fields fields + arg protocol.HooksProtocol + wantErr bool + wantFields wantFields + }{ + { + name: "container context invalid", + arg: (*protocol.ContainerContext)(nil), + wantErr: true, + }, + { + name: "invalid cgroup parent", + fields: fields{ + plugin: newPlugin(), + }, + arg: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodMeta: protocol.PodMeta{ + Name: "test-pod", + UID: "xxxxxx", + }, + CgroupParent: "", + }, + }, + wantErr: true, + }, + { + name: "missing container ID", + fields: fields{ + plugin: newPlugin(), + }, + arg: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodMeta: protocol.PodMeta{ + Name: "test-pod", + UID: "xxxxxx", + }, + CgroupParent: "kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", + }, + }, + wantErr: true, + }, + { + name: "rule has not initialized", + fields: fields{ + plugin: newPlugin(), + }, + arg: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodMeta: protocol.PodMeta{ + Name: "test-pod", + UID: "xxxxxx", + }, + ContainerMeta: protocol.ContainerMeta{ + ID: "containerd://yyyyyy", + }, + CgroupParent: "kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", + }, + }, + wantErr: false, + }, + { + name: "add cookie for LS container correctly", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(1000000) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 12344: 0, + 12345: 0, + 12346: 0, + }, map[uint32]uint32{ + 1: 1, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12346: true, + }), + groupID: "group-xxx-expeller", + }, + arg: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodMeta: protocol.PodMeta{ + Name: "test-pod", + UID: "xxxxxx", + }, + PodAnnotations: map[string]string{}, + PodLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + ContainerMeta: protocol.ContainerMeta{ + Name: "test-container", + ID: "containerd://yyyyyy", + }, + CgroupParent: "kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", + }, + }, + wantErr: false, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{ + 1000000: { + 12344, + 12345, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx-expeller": 1000000, + }, + }, + }, + { + name: "failed to add cookie for LS container when core sched add failed", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(1000000) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 12344: 0, + 12345: 0, + 12346: 0, + }, map[uint32]uint32{ + 1: 1, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12344: true, + 12345: true, + 12346: true, + }), + groupID: "group-xxx-expeller", + }, + arg: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodMeta: protocol.PodMeta{ + Name: "test-pod", + UID: "xxxxxx", + }, + PodAnnotations: map[string]string{}, + PodLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + ContainerMeta: protocol.ContainerMeta{ + Name: "test-container", + ID: "containerd://yyyyyy", + }, + CgroupParent: "kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", + }, + }, + wantErr: false, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{ + 1000000: {}, + }, + groupToCookie: map[string]uint64{}, + }, + }, + { + name: "failed to add cookie for BE container when PIDs no longer exist", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteCgroupFileContents("kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(1000000) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 12344: 0, + 12345: 0, + 12346: 0, + }, map[uint32]uint32{ + 1: 1, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12346: true, + }), + groupID: "group-xxx", + }, + arg: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodMeta: protocol.PodMeta{ + Name: "test-pod", + UID: "xxxxxx", + }, + PodAnnotations: map[string]string{}, + PodLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSBE), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + ContainerMeta: protocol.ContainerMeta{ + Name: "test-container", + ID: "containerd://yyyyyy", + }, + CgroupParent: "kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podxxxxxx.slice/cri-containerd-yyyyyy.scope", + }, + }, + wantErr: false, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{}, + groupToCookie: map[string]uint64{}, + }, + }, + { + name: "assign cookie for LS container correctly", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + p.cookieCache.SetDefault("group-xxx-expeller", newCookieCacheEntry(1000000, 1000, 1001, 1002)) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 1000: 1000000, + 1001: 1000000, + 1002: 1000000, + 12344: 0, + 12345: 0, + 12346: 0, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12346: true, + }), + groupID: "group-xxx-expeller", + }, + arg: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodMeta: protocol.PodMeta{ + Name: "test-pod", + UID: "xxxxxx", + }, + PodAnnotations: map[string]string{}, + PodLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + ContainerMeta: protocol.ContainerMeta{ + Name: "test-container", + ID: "containerd://yyyyyy", + }, + CgroupParent: "kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", + }, + }, + wantErr: false, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{ + 1000000: { + 1000, + 1001, + 1002, + 12344, + 12345, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx-expeller": 1000000, + }, + }, + }, + { + name: "failed to assign cookie for LS container but fallback to add correctly", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + p.cookieCache.SetDefault("group-xxx-expeller", newCookieCacheEntry(1000000, 1000, 1001, 1002)) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 12344: 0, + 12345: 0, + 12346: 0, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 1000: true, + 1001: true, + 1002: true, + 12346: true, + }), + groupID: "group-xxx-expeller", + }, + arg: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodMeta: protocol.PodMeta{ + Name: "test-pod", + UID: "xxxxxx", + }, + PodAnnotations: map[string]string{}, + PodLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + ContainerMeta: protocol.ContainerMeta{ + Name: "test-container", + ID: "containerd://yyyyyy", + }, + CgroupParent: "kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", + }, + }, + wantErr: false, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{ + 1000000: {}, + 2000000: { + 12344, + 12345, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx-expeller": 2000000, + }, + }, + }, + { + name: "failed to assign cookie for LS container neither add", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + p.cookieCache.SetDefault("group-xxx-expeller", newCookieCacheEntry(1000000, 1000, 1001, 1002)) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 12344: 0, + 12345: 0, + 12346: 0, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 1000: true, + 1001: true, + 1002: true, + 12344: true, + 12345: true, + 12346: true, + }), + groupID: "group-xxx-expeller", + }, + arg: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodMeta: protocol.PodMeta{ + Name: "test-pod", + UID: "xxxxxx", + }, + PodAnnotations: map[string]string{}, + PodLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + ContainerMeta: protocol.ContainerMeta{ + Name: "test-container", + ID: "containerd://yyyyyy", + }, + CgroupParent: "kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", + }, + }, + wantErr: false, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{ + 1000000: {}, + }, + groupToCookie: map[string]uint64{}, + }, + }, + { + name: "clear cookie for LS container correctly", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + p.cookieCache.SetDefault("group-xxx-expeller", newCookieCacheEntry(1000000, 1000, 1001, 1002, 12344)) + p.groupCache.SetDefault("xxxxxx/containerd://yyyyyy", "group-xxx-expeller") + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 1000: 1000000, + 1001: 1000000, + 1002: 1000000, + 12344: 1000000, + 12345: 1000000, + 12346: 1000000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12346: true, + }), + groupID: "group-xxx-expeller", + }, + arg: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodMeta: protocol.PodMeta{ + Name: "test-pod", + UID: "xxxxxx", + }, + PodAnnotations: map[string]string{}, + PodLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: slov1alpha1.CoreSchedGroupIDNone, + }, + ContainerMeta: protocol.ContainerMeta{ + Name: "test-container", + ID: "containerd://yyyyyy", + }, + CgroupParent: "kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", + }, + }, + wantErr: false, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{ + 1000000: { + 1000, + 1001, + 1002, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx-expeller": 1000000, + }, + }, + }, + { + name: "clear cookie for LSR container correctly", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + p.rule.podQOSParams[extension.QoSLSR] = Param{ + IsPodEnabled: true, + IsExpeller: false, + IsCPUIdle: false, + } + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + p.cookieCache.SetDefault("group-xxx", newCookieCacheEntry(1000000, 1000, 1001, 1002, 12344)) + p.groupCache.SetDefault("xxxxxx/containerd://yyyyyy", "group-xxx") + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 1000: 1000000, + 1001: 1000000, + 1002: 1000000, + 12344: 1000000, + 12345: 1000000, + 12346: 1000000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12346: true, + }), + groupID: "group-xxx", + }, + arg: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodMeta: protocol.PodMeta{ + Name: "test-pod", + UID: "xxxxxx", + }, + PodAnnotations: map[string]string{}, + PodLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLSR), + slov1alpha1.LabelCoreSchedGroupID: slov1alpha1.CoreSchedGroupIDNone, + }, + ContainerMeta: protocol.ContainerMeta{ + Name: "test-container", + ID: "containerd://yyyyyy", + }, + CgroupParent: "kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", + }, + }, + wantErr: false, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{ + 1000000: { + 1000, + 1001, + 1002, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx": 1000000, + }, + }, + }, + { + name: "clear cookie for BE container correctly", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteCgroupFileContents("kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + p.cookieCache.SetDefault("group-xxx", newCookieCacheEntry(1000000, 1000, 1001, 1002, 12344)) + p.groupCache.SetDefault("xxxxxx/containerd://yyyyyy", "group-xxx") + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 1000: 1000000, + 1001: 1000000, + 1002: 1000000, + 12344: 1000000, + 12345: 1000000, + 12346: 1000000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12346: true, + }), + groupID: "group-xxx", + }, + arg: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodMeta: protocol.PodMeta{ + Name: "test-pod", + UID: "xxxxxx", + }, + PodAnnotations: map[string]string{}, + PodLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSBE), + slov1alpha1.LabelCoreSchedGroupID: slov1alpha1.CoreSchedGroupIDNone, + }, + ContainerMeta: protocol.ContainerMeta{ + Name: "test-container", + ID: "containerd://yyyyyy", + }, + CgroupParent: "kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podxxxxxx.slice/cri-containerd-yyyyyy.scope", + }, + }, + wantErr: false, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{ + 1000000: { + 1000, + 1001, + 1002, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx": 1000000, + }, + }, + }, + { + name: "failed to clear cookie for LS container when not enabled before", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 12344: 0, + 12345: 0, + 12346: 0, + }, map[uint32]uint32{ + 1: 1, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12346: true, + }), + groupID: "group-xxx-expeller", + }, + arg: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodMeta: protocol.PodMeta{ + Name: "test-pod", + UID: "xxxxxx", + }, + PodAnnotations: map[string]string{}, + PodLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: slov1alpha1.CoreSchedGroupIDNone, + }, + ContainerMeta: protocol.ContainerMeta{ + Name: "test-container", + ID: "containerd://yyyyyy", + }, + CgroupParent: "kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", + }, + }, + wantErr: false, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{}, + groupToCookie: map[string]uint64{}, + }, + }, + { + name: "aborted to clear cookie for BE container since PID not found", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteCgroupFileContents("kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + p.cookieCache.SetDefault("group-xxx", newCookieCacheEntry(1000000, 1000, 1001, 1002)) + p.groupCache.SetDefault("xxxxxx/containerd://yyyyyy", "group-xxx") + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 1000: 1000000, + 1001: 1000000, + 1002: 1000000, + 12344: 0, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12344: true, + 12345: true, + 12346: true, + }), + groupID: "group-xxx", + }, + arg: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodMeta: protocol.PodMeta{ + Name: "test-pod", + UID: "xxxxxx", + }, + PodAnnotations: map[string]string{}, + PodLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSBE), + slov1alpha1.LabelCoreSchedGroupID: slov1alpha1.CoreSchedGroupIDNone, + }, + ContainerMeta: protocol.ContainerMeta{ + Name: "test-container", + ID: "containerd://yyyyyy", + }, + CgroupParent: "kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podxxxxxx.slice/cri-containerd-yyyyyy.scope", + }, + }, + wantErr: false, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{ + 1000000: { + 1000, + 1001, + 1002, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx": 1000000, + }, + }, + }, + { + name: "add cookie for LS container migrated between groups", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(1000000) + p.cookieCache.SetDefault("group-yyy-expeller", newCookieCacheEntry(999999, 12344, 12345, 12346)) + p.groupCache.SetDefault("xxxxxx/containerd://yyyyyy", "group-yyy-expeller") + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 12344: 999999, + 12345: 999999, + }, map[uint32]uint32{ + 1: 1, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12346: true, + }), + groupID: "group-xxx-expeller", + }, + arg: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodMeta: protocol.PodMeta{ + Name: "test-pod", + UID: "xxxxxx", + }, + PodAnnotations: map[string]string{}, + PodLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + ContainerMeta: protocol.ContainerMeta{ + Name: "test-container", + ID: "containerd://yyyyyy", + }, + CgroupParent: "kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", + }, + }, + wantErr: false, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{ + 1000000: { + 12344, + 12345, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx-expeller": 1000000, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + helper := sysutil.NewFileTestUtil(t) + defer helper.Cleanup() + if tt.fields.prepareFn != nil { + tt.fields.prepareFn(helper) + } + p := tt.fields.plugin + if tt.fields.cse != nil { + p.cse = tt.fields.cse + } + if tt.fields.preparePluginFn != nil { + tt.fields.preparePluginFn(p) + } + + gotErr := p.SetContainerCookie(tt.arg) + assert.Equal(t, tt.wantErr, gotErr != nil, gotErr) + for cookie, pids := range tt.wantFields.cookieToPIDs { + for _, pid := range pids { + if tt.fields.cse != nil { + got, gotErr := tt.fields.cse.Get(sysutil.CoreSchedScopeThread, pid) + assert.NoError(t, gotErr) + assert.Equal(t, cookie, got) + } + } + } + for groupID, cookieID := range tt.wantFields.groupToCookie { + if cookieID <= 0 { + _, ok := p.cookieCache.Get(tt.fields.groupID) + assert.False(t, ok, groupID) + continue + } + + entryIf, ok := p.cookieCache.Get(tt.fields.groupID) + assert.True(t, ok) + entry, ok := entryIf.(*CookieCacheEntry) + assert.True(t, ok) + assert.Equal(t, cookieID, entry.GetCookieID()) + assert.Equal(t, len(tt.wantFields.cookieToPIDs[cookieID]), len(entry.GetAllPIDs()), + "expect [%v] but got [%v]", tt.wantFields.cookieToPIDs[cookieID], entry.GetAllPIDs()) + for _, pid := range tt.wantFields.cookieToPIDs[cookieID] { + assert.True(t, entry.HasPID(pid)) + } + } + }) + } +} + +func TestPlugin_LoadAllCookies(t *testing.T) { + type fields struct { + prepareFn func(helper *sysutil.FileTestUtil) + plugin *Plugin + preparePluginFn func(p *Plugin) + cse sysutil.CoreSchedExtendedInterface + } + type wantFields struct { + cookieToPIDs map[uint64][]uint32 + groupToCookie map[string]uint64 + } + tests := []struct { + name string + fields fields + arg []*statesinformer.PodMeta + want bool + wantFields wantFields + }{ + { + name: "sync pods failed for no pod PID available", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + sandboxContainerCgroupDir, _ := util.GetContainerCgroupParentDirByID("kubepods.slice/kubepods-podxxxxxx.slice", "containerd://aaaaaa") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcs, "") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcsV2, "") + containerCgroupDir, _ := util.GetContainerCgroupParentDirByID("kubepods.slice/kubepods-podxxxxxx.slice", "containerd://yyyyyy") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcs, "") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcsV2, "") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 12340: 1000000, + 12344: 1000000, + 12345: 1000000, + 12346: 1000000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12340: 12340, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12346: true, + }), + }, + arg: []*statesinformer.PodMeta{ + { + CgroupDir: "kubepods.slice/kubepods-podxxxxxx.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + UID: "xxxxxx", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://yyyyyy", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + }, + { + CgroupDir: "kubepods.slice/kubepods-podnnnnnn.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-1", + UID: "nnnnnn", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLSR), + slov1alpha1.LabelCoreSchedGroupID: "group-nnn", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container-1", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://mmmmmm", + }, + }, + }, + }, + }, + }, + want: false, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{}, + groupToCookie: map[string]uint64{}, + }, + }, + { + name: "sync pods partially correct", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + sandboxContainerCgroupDir, _ := util.GetContainerCgroupParentDirByID("kubepods.slice/kubepods-podxxxxxx.slice", "containerd://aaaaaa") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcs, "12340\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcsV2, "12340\n") + containerCgroupDir, _ := util.GetContainerCgroupParentDirByID("kubepods.slice/kubepods-podxxxxxx.slice", "containerd://yyyyyy") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcsV2, "12344\n12345\n12346\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 12340: 1000000, + 12344: 1000000, + 12345: 1000000, + 12346: 1000000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12340: 12340, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12345: true, + }), + }, + arg: []*statesinformer.PodMeta{ + { + CgroupDir: "kubepods.slice/kubepods-podxxxxxx.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + UID: "xxxxxx", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://yyyyyy", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + }, + { + CgroupDir: "kubepods.slice/kubepods-podnnnnnn.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-1", + UID: "nnnnnn", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLSR), + slov1alpha1.LabelCoreSchedGroupID: "group-nnn", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container-1", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://mmmmmm", + }, + }, + }, + }, + }, + }, + want: true, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{ + 1000000: { + 12340, + 12344, + 12346, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx-expeller": 1000000, + }, + }, + }, + { + name: "sync pods correctly for single pod", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + sandboxContainerCgroupDir, _ := util.GetContainerCgroupParentDirByID("kubepods.slice/kubepods-podxxxxxx.slice", "containerd://aaaaaa") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcs, "12340\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcsV2, "12340\n") + containerCgroupDir, _ := util.GetContainerCgroupParentDirByID("kubepods.slice/kubepods-podxxxxxx.slice", "containerd://yyyyyy") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcsV2, "12344\n12345\n12346\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 12340: 1000000, + 12344: 1000000, + 12345: 1000000, + 12346: 1000000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12340: 12340, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12346: true, + }), + }, + arg: []*statesinformer.PodMeta{ + { + CgroupDir: "kubepods.slice/kubepods-podxxxxxx.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + UID: "xxxxxx", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://yyyyyy", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + }, + { + CgroupDir: "kubepods.slice/kubepods-podnnnnnn.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-1", + UID: "nnnnnn", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLSR), + slov1alpha1.LabelCoreSchedGroupID: "group-nnn", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container-1", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://mmmmmm", + }, + }, + }, + }, + }, + }, + want: true, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{ + 1000000: { + 12340, + 12344, + 12345, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx-expeller": 1000000, + }, + }, + }, + { + name: "sync pods correctly for multiple pods", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + // test-pod + sandboxContainerCgroupDir, _ := util.GetContainerCgroupParentDirByID("kubepods.slice/kubepods-podxxxxxx.slice", "containerd://aaaaaa") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcs, "12340\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcsV2, "12340\n") + containerCgroupDir, _ := util.GetContainerCgroupParentDirByID("kubepods.slice/kubepods-podxxxxxx.slice", "containerd://yyyyyy") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcsV2, "12344\n12345\n12346\n") + // test-pod-2 + sandboxContainerCgroupDir1, _ := util.GetContainerCgroupParentDirByID("kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podcccccc.slice", "containerd://dddddd") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir1, sysutil.CPUProcs, "32760\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir1, sysutil.CPUProcsV2, "32760\n") + containerCgroupDir1, _ := util.GetContainerCgroupParentDirByID("kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podcccccc.slice", "containerd://zzzzzz") + helper.WriteCgroupFileContents(containerCgroupDir1, sysutil.CPUProcs, "32768\n32770\n32771\n") + helper.WriteCgroupFileContents(containerCgroupDir1, sysutil.CPUProcsV2, "32768\n32770\n32771\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 12340: 1000000, + 12344: 1000000, + 12345: 1000000, + 12346: 1000000, + 32760: 1000000, + 32768: 1000000, + 32770: 1000000, + 32772: 1000000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12340: 12340, + 12344: 12344, + 12345: 12344, + 12346: 12346, + 32760: 32760, + 32768: 32768, + 32770: 32768, + 32772: 32768, + }, map[uint32]bool{ + 12346: true, + 32771: true, + }), + }, + arg: []*statesinformer.PodMeta{ + { + CgroupDir: "kubepods.slice/kubepods-podxxxxxx.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + UID: "xxxxxx", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLSR), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://yyyyyy", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + }, + { + CgroupDir: "kubepods.slice/kubepods-podnnnnnn.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-1", + UID: "nnnnnn", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLSR), + slov1alpha1.LabelCoreSchedGroupID: "group-nnn", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container-1", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://mmmmmm", + }, + }, + }, + }, + }, + { + CgroupDir: "kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podcccccc.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-2", + UID: "cccccc", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container-2", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + QOSClass: corev1.PodQOSBurstable, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container-2", + ContainerID: "containerd://zzzzzz", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + }, + }, + want: true, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{ + 1000000: { + 12340, + 12344, + 12345, + 32760, + 32768, + 32770, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx-expeller": 1000000, + }, + }, + }, + { + name: "sync pods correctly for multiple containers with inconsistent cookies", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + sandboxContainerCgroupDir, _ := util.GetContainerCgroupParentDirByID("kubepods.slice/kubepods-podxxxxxx.slice", "containerd://aaaaaa") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcs, "12340\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcsV2, "12340\n") + containerCgroupDir, _ := util.GetContainerCgroupParentDirByID("kubepods.slice/kubepods-podxxxxxx.slice", "containerd://yyyyyy") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcsV2, "12344\n12345\n12346\n") + containerCgroupDir1, _ := util.GetContainerCgroupParentDirByID("kubepods.slice/kubepods-podxxxxxx.slice", "containerd://zzzzzz") + helper.WriteCgroupFileContents(containerCgroupDir1, sysutil.CPUProcs, "12350\n") + helper.WriteCgroupFileContents(containerCgroupDir1, sysutil.CPUProcsV2, "12350\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 12340: 1000000, + 12344: 1000000, + 12345: 1000000, + 12346: 1000000, + 12350: 1100000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12340: 12340, + 12344: 12344, + 12345: 12344, + 12346: 12346, + 12350: 12350, + }, map[uint32]bool{ + 12346: true, + }), + }, + arg: []*statesinformer.PodMeta{ + { + CgroupDir: "kubepods.slice/kubepods-podxxxxxx.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + UID: "xxxxxx", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }, { + Name: "test-container-1", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://yyyyyy", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, { + Name: "test-container-1", + ContainerID: "containerd://zzzzzz", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + }, + { + CgroupDir: "kubepods.slice/kubepods-podnnnnnn.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-1", + UID: "nnnnnn", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLSR), + slov1alpha1.LabelCoreSchedGroupID: "group-nnn", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container-1", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://mmmmmm", + }, + }, + }, + }, + }, + }, + want: true, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{ + 1000000: { + 12340, + 12344, + 12345, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx-expeller": 1000000, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + helper := sysutil.NewFileTestUtil(t) + defer helper.Cleanup() + if tt.fields.prepareFn != nil { + tt.fields.prepareFn(helper) + } + p := tt.fields.plugin + if tt.fields.cse != nil { + p.cse = tt.fields.cse + } + if tt.fields.preparePluginFn != nil { + tt.fields.preparePluginFn(p) + } + + got := p.LoadAllCookies(tt.arg) + assert.Equal(t, tt.want, got) + for groupID, cookieID := range tt.wantFields.groupToCookie { + if cookieID <= 0 { + _, ok := p.cookieCache.Get(groupID) + assert.False(t, ok, groupID) + continue + } + + entryIf, ok := p.cookieCache.Get(groupID) + assert.True(t, ok) + entry, ok := entryIf.(*CookieCacheEntry) + assert.True(t, ok) + assert.Equal(t, cookieID, entry.GetCookieID()) + assert.Equal(t, len(tt.wantFields.cookieToPIDs[cookieID]), len(entry.GetAllPIDs()), + "expect [%v] but got [%v]", tt.wantFields.cookieToPIDs[cookieID], entry.GetAllPIDs()) + for _, pid := range tt.wantFields.cookieToPIDs[cookieID] { + assert.True(t, entry.HasPID(pid), pid) + } + } + }) + } +} + +func TestPlugin_SetKubeQOSCPUIdle(t *testing.T) { + type fields struct { + rule *Rule + } + tests := []struct { + name string + fields fields + arg protocol.HooksProtocol + wantErr bool + wantField *protocol.KubeQOSContext + }{ + { + name: "nil context", + arg: (*protocol.KubeQOSContext)(nil), + wantErr: true, + }, + { + name: "rule not inited", + fields: fields{ + rule: newRule(), + }, + arg: &protocol.KubeQOSContext{ + Request: protocol.KubeQOSRequet{ + KubeQOSClass: corev1.PodQOSBurstable, + CgroupParent: "kubepods.slice/kubepods-burstable.slice", + }, + }, + wantErr: false, + wantField: &protocol.KubeQOSContext{ + Request: protocol.KubeQOSRequet{ + KubeQOSClass: corev1.PodQOSBurstable, + CgroupParent: "kubepods.slice/kubepods-burstable.slice", + }, + }, + }, + { + name: "cpu idle disabled", + fields: fields{ + rule: testGetDisabledRule(), + }, + arg: &protocol.KubeQOSContext{ + Request: protocol.KubeQOSRequet{ + KubeQOSClass: corev1.PodQOSBurstable, + CgroupParent: "kubepods.slice/kubepods-burstable.slice", + }, + }, + wantErr: false, + wantField: &protocol.KubeQOSContext{ + Request: protocol.KubeQOSRequet{ + KubeQOSClass: corev1.PodQOSBurstable, + CgroupParent: "kubepods.slice/kubepods-burstable.slice", + }, + Response: protocol.KubeQOSResponse{ + Resources: protocol.Resources{ + CPUIdle: pointer.Int64(0), + }, + }, + }, + }, + { + name: "cpu idle enabled", + fields: fields{ + rule: testGetAllEnabledRule(), + }, + arg: &protocol.KubeQOSContext{ + Request: protocol.KubeQOSRequet{ + KubeQOSClass: corev1.PodQOSBestEffort, + CgroupParent: "kubepods.slice/kubepods-besteffort.slice", + }, + }, + wantErr: false, + wantField: &protocol.KubeQOSContext{ + Request: protocol.KubeQOSRequet{ + KubeQOSClass: corev1.PodQOSBestEffort, + CgroupParent: "kubepods.slice/kubepods-besteffort.slice", + }, + Response: protocol.KubeQOSResponse{ + Resources: protocol.Resources{ + CPUIdle: pointer.Int64(1), + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := newPlugin() + p.rule = tt.fields.rule + gotErr := p.SetKubeQOSCPUIdle(tt.arg) + assert.Equal(t, tt.wantErr, gotErr != nil, gotErr) + if !tt.wantErr { + assert.Equal(t, tt.wantField, tt.arg) + } + }) + } +} + +func testGetEnabledPlugin() *Plugin { + return &Plugin{ + rule: testGetEnabledRule(), + cookieCache: gocache.New(defaultCacheExpiration, defaultCacheDeleteInterval), + groupCache: gocache.New(defaultCacheExpiration, defaultCacheDeleteInterval), + reader: resourceexecutor.NewCgroupReader(), + executor: resourceexecutor.NewTestResourceExecutor(), + sysSupported: pointer.Bool(true), + allPodsSyncOnce: sync.Once{}, + initialized: atomic.NewBool(true), + } +} diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/helper.go b/pkg/koordlet/runtimehooks/hooks/coresched/helper.go new file mode 100644 index 000000000..041404555 --- /dev/null +++ b/pkg/koordlet/runtimehooks/hooks/coresched/helper.go @@ -0,0 +1,394 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package coresched + +import ( + "fmt" + + corev1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" + + "github.com/koordinator-sh/koordinator/apis/extension" + slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1" + "github.com/koordinator-sh/koordinator/pkg/koordlet/metrics" + "github.com/koordinator-sh/koordinator/pkg/koordlet/resourceexecutor" + "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/protocol" + "github.com/koordinator-sh/koordinator/pkg/koordlet/statesinformer" + "github.com/koordinator-sh/koordinator/pkg/koordlet/util" + sysutil "github.com/koordinator-sh/koordinator/pkg/koordlet/util/system" +) + +type containerPID struct { + ContainerName string + ContainerID string + PID []uint32 +} + +// getCookie retrieves the last core sched cookies applied to the PIDs. +// If multiple cookies are set for the PIDs, only the first non-default cookie is picked. +// It returns the last cookie ID, PIDs synced and the error. +func (p *Plugin) getCookie(pids []uint32, groupID string) (uint64, []uint32, error) { + if len(pids) <= 0 { + klog.V(6).Infof("aborted to sync PIDs cookie for group %s, no PID", groupID) + return 0, nil, nil + } + newCookieIDMap := map[uint64]struct{}{} + firstNewCookieID := sysutil.DefaultCoreSchedCookieID + var newCookiePIDs []uint32 + for _, pid := range pids { + cookieID, err := p.cse.Get(sysutil.CoreSchedScopeThread, pid) + if err != nil { + klog.V(6).Infof("failed to sync last cookie for PID %v, group %s, err: %s", pid, groupID, err) + continue + } + if cookieID != sysutil.DefaultCoreSchedCookieID { + newCookieIDMap[cookieID] = struct{}{} + + if firstNewCookieID == sysutil.DefaultCoreSchedCookieID { + firstNewCookieID = cookieID + } + if cookieID == firstNewCookieID { + newCookiePIDs = append(newCookiePIDs, pid) + } + } + } + + if len(newCookieIDMap) <= 0 { // no cookie to sync, all PIDs are default or unknown + return 0, nil, nil + } + + if len(newCookieIDMap) == 1 { // only one cookie to sync + return firstNewCookieID, newCookiePIDs, nil + } + // else newCookieIDMap > 1 + + // When got more than one non-default cookie for given group, use the first synced new cookie ID. + // Let the PIDs of different cookies fixed by the next container-level reconciliation. + klog.V(4).Infof("unexpected number of cookies to sync, group %s, cookie ID %v, found cookie num %v, PID num %v", + groupID, firstNewCookieID, len(newCookieIDMap), len(pids)) + return firstNewCookieID, newCookiePIDs, nil +} + +// addCookie creates a new cookie for the given PIDs[0], and assign the cookie to PIDs[1:]. +// It returns the new cookie ID, the assigned PIDs, and the error. +// TODO: refactor to resource updater. +func (p *Plugin) addCookie(pids []uint32, groupID string) (uint64, []uint32, error) { + if len(pids) <= 0 { + klog.V(6).Infof("aborted to add PIDs cookie for group %s, no PID", groupID) + return 0, nil, nil + } + lastCookieID, err := p.cse.Get(sysutil.CoreSchedScopeThread, pids[0]) + if err != nil { + return 0, nil, fmt.Errorf("get last cookie ID for PID %v failed, err: %s", pids[0], err) + } + if lastCookieID != sysutil.DefaultCoreSchedCookieID { // perhaps the group is changed + klog.V(5).Infof("last cookie ID for PID %v is not default, group %s, cookie expect %v but got %v", + pids[0], groupID, sysutil.DefaultCoreSchedCookieID, lastCookieID) + } + + err = p.cse.Create(sysutil.CoreSchedScopeThreadGroup, pids[0]) + if err != nil { + return 0, nil, fmt.Errorf("create cookie for PID %v failed, err: %s", pids[0], err) + } + cookieID, err := p.cse.Get(sysutil.CoreSchedScopeThread, pids[0]) + if err != nil { + return 0, nil, fmt.Errorf("get new cookie ID for PID %v failed, err: %s", pids[0], err) + } + + failedPIDs, err := p.cse.Assign(sysutil.CoreSchedScopeThread, pids[0], sysutil.CoreSchedScopeThreadGroup, pids[1:]...) + if err != nil { + klog.V(5).Infof("failed to assign new cookie for group %s, cookie %v, PID from %v, PID to %v failed of %v, err: %s", + groupID, cookieID, pids[0], len(failedPIDs), len(pids)-1, err) + } + + pidsAdded := NewPIDCache(pids...) + pidsAdded.DeleteAny(failedPIDs...) + + return cookieID, pidsAdded.GetAllSorted(), nil +} + +// assignCookie assigns the target cookieID to the given PIDs. +// It returns the PIDs assigned, PIDs to delete, and the error (when exists, fallback adding new cookie). +// TODO: refactor to resource updater. +func (p *Plugin) assignCookie(pids, siblingPIDs []uint32, groupID string, targetCookieID uint64) ([]uint32, []uint32, error) { + if len(pids) <= 0 { + klog.V(6).Infof("aborted to assign PIDs cookie for group %s, target cookie %v, no PID", + targetCookieID, groupID) + return nil, nil, nil + } + pidsToAssign := NewPIDCache() + var pidsAssigned []uint32 + unknownCount := 0 + for _, pid := range pids { + lastCookieID, err := p.cse.Get(sysutil.CoreSchedScopeThread, pid) + if err != nil { + klog.V(6).Infof("failed to get cookie for PID %v during assign, group %s, err: %s", + pid, groupID, err) + unknownCount++ + continue + } + if lastCookieID != targetCookieID { + pidsToAssign.AddAny(pid) + } else { + pidsAssigned = append(pidsAssigned, pid) + } + } + + if unknownCount >= len(pids) { // in case the given pids terminate, e.g. the container is restarting, aborted + klog.V(5).Infof("failed to get last cookie for group %s, got %v unknown of %v PIDs", + groupID, unknownCount, len(pids)) + return nil, nil, nil + } + + if pidsToAssign.Len() <= 0 { // all PIDs are assigned, just refresh reference + return pidsAssigned, nil, nil + } + + var sPIDsToDelete []uint32 + validSiblingPID := uint32(0) // find one valid sibling PID to share from + for _, sPID := range siblingPIDs { + pCookieID, err := p.cse.Get(sysutil.CoreSchedScopeThread, sPID) + if err != nil { + klog.V(6).Infof("failed to get cookie for sibling PID %v, group %s, err: %s", + sPID, groupID, err) + sPIDsToDelete = append(sPIDsToDelete, sPID) + continue + } + if pCookieID != targetCookieID { + klog.V(6).Infof("failed to get target cookie for sibling PID %v, err: expect %v but got %v", + sPID, targetCookieID, pCookieID) + sPIDsToDelete = append(sPIDsToDelete, sPID) + continue + } + + // get the first valid sibling PID + validSiblingPID = sPID + break + } + + if validSiblingPID == 0 { + return nil, sPIDsToDelete, fmt.Errorf("no valid sibling PID, sibling PIDs to delete num %v", + len(sPIDsToDelete)) + } + + // assign to valid sibling PID + failedPIDs, err := p.cse.Assign(sysutil.CoreSchedScopeThread, validSiblingPID, sysutil.CoreSchedScopeThreadGroup, pidsToAssign.GetAllSorted()...) + if err != nil { + klog.V(5).Infof("failed to assign group cookie for group %s, target cookie %v, PID from %v, PID to %v failed of %v, err: %s", + groupID, targetCookieID, validSiblingPID, len(failedPIDs), pidsToAssign.Len(), err) + pidsToAssign.DeleteAny(failedPIDs...) + } + pidsToAssign.AddAny(pidsAssigned...) + + return pidsToAssign.GetAllSorted(), sPIDsToDelete, nil +} + +// clearCookie clears the cookie for the given PIDs to the default cookie 0. +// It returns the PIDs cleared. +func (p *Plugin) clearCookie(pids []uint32, groupID string, lastCookieID uint64) []uint32 { + if len(pids) <= 0 { + klog.V(6).Infof("aborted to clear PIDs cookie for group %s, no PID", groupID) + return nil + } + pidsToClear := NewPIDCache() + var pidsCleared []uint32 + for _, pid := range pids { + pCookieID, err := p.cse.Get(sysutil.CoreSchedScopeThread, pid) + if err != nil { + klog.V(6).Infof("failed to get cookie for PID %v, group %s, err: %s", pid, groupID, err) + continue + } + if pCookieID != sysutil.DefaultCoreSchedCookieID { + pidsToClear.AddAny(pid) + } else { + pidsCleared = append(pidsCleared, pid) + } + } + + if pidsToClear.Len() <= 0 { + return pidsCleared + } + + failedPIDs, err := p.cse.Clear(sysutil.CoreSchedScopeThreadGroup, pidsToClear.GetAllSorted()...) + if err != nil { + klog.V(4).Infof("failed to clear cookie for group, last cookie %v, PID %v failed of %v, total %v, err: %s", + groupID, lastCookieID, len(failedPIDs), pidsToClear.GetAllSorted(), len(pids), err) + pidsToClear.DeleteAny(failedPIDs...) + } + pidsToClear.AddAny(pidsCleared...) + + return pidsToClear.GetAllSorted() +} + +// getPodEnabledAndGroup gets whether the pod enables the core scheduling and the group ID if it does. +func (p *Plugin) getPodEnabledAndGroup(podAnnotations, podLabels map[string]string, podKubeQOS corev1.PodQOSClass, podUID string) (bool, string) { + // if the pod enables/disables the core-sched explicitly + groupID, isPodDisabled := slov1alpha1.GetCoreSchedGroupID(podLabels) + if isPodDisabled != nil && *isPodDisabled { // pod disables + return false, groupID + } + + podQOS := extension.QoSNone + if podLabels != nil { + podQOS = extension.GetQoSClassByAttrs(podLabels, podAnnotations) + } + isQOSEnabled, isExpeller := p.rule.IsPodEnabled(podQOS, podKubeQOS) + groupID = p.getGroupID(groupID, podUID, isExpeller) + + if isPodDisabled != nil { // assert *isPodDisabled == true + return true, groupID + } + + // use the QoS-level rules + return isQOSEnabled, groupID +} + +func (p *Plugin) getGroupID(baseGroupID string, podUID string, isExpeller bool) string { + var groupID string + if len(baseGroupID) > 0 { + groupID = baseGroupID + } else { + groupID = podUID + } + if isExpeller { + groupID += ExpellerGroupSuffix + } + return groupID +} + +func (p *Plugin) getContainerUID(podUID string, containerID string) string { + return podUID + "/" + containerID +} + +func (p *Plugin) getContainerPIDs(containerCgroupParent string) ([]uint32, error) { + pids, err := p.reader.ReadCPUProcs(containerCgroupParent) + if err != nil && resourceexecutor.IsCgroupDirErr(err) { + klog.V(5).Infof("aborted to get PIDs for container dir %s, err: %s", + containerCgroupParent, err) + return nil, nil + } + if err != nil { + return nil, fmt.Errorf("get container PIDs failed, err: %w", err) + } + return pids, nil +} + +func (p *Plugin) getSandboxContainerPIDs(podMeta *statesinformer.PodMeta) ([]uint32, string, error) { + sandboxID, err := util.GetPodSandboxContainerID(podMeta.Pod) + if err != nil { + return nil, "", fmt.Errorf("get sandbox container ID failed, err: %w", err) + } + sandboxContainerDir, err := util.GetContainerCgroupParentDirByID(podMeta.CgroupDir, sandboxID) + if err != nil { + return nil, sandboxID, fmt.Errorf("get cgroup parent for sandbox container %s/%s, err: %w", + podMeta.Key(), sandboxID, err) + } + pids, err := p.getContainerPIDs(sandboxContainerDir) + if err != nil { + return nil, sandboxID, fmt.Errorf("get PID failed for sandbox container %s/%s, parent dir %s, err: %w", + podMeta.Key(), sandboxID, sandboxContainerDir, err) + } + return pids, sandboxID, nil +} + +func (p *Plugin) getNormalContainerPIDs(podMeta *statesinformer.PodMeta, containerStatus *corev1.ContainerStatus) ([]uint32, error) { + var pids []uint32 + containerDir, err := util.GetContainerCgroupParentDir(podMeta.CgroupDir, containerStatus) + if err != nil { + return nil, fmt.Errorf("get cgroup parent for container %s/%s, err: %w", + podMeta.Key(), containerStatus.Name, err) + } + pids, err = p.getContainerPIDs(containerDir) + if err != nil { + return nil, fmt.Errorf("get PID failed for container %s/%s, parent dir %s, err: %w", + podMeta.Key(), containerStatus.Name, containerDir, err) + } + return pids, nil +} + +func (p *Plugin) getAllContainerPIDs(podMeta *statesinformer.PodMeta) []*containerPID { + var containerToPIDs []*containerPID + count := 0 + pod := podMeta.Pod + + // for sandbox container + sandboxPIDs, sandboxContainerID, err := p.getSandboxContainerPIDs(podMeta) + if err != nil { + klog.V(5).Infof("failed to get sandbox container PID for pod %s, err: %s", podMeta.Key(), err) + } else { + containerToPIDs = append(containerToPIDs, &containerPID{ + ContainerID: sandboxContainerID, + PID: sandboxPIDs, + }) + count += len(sandboxPIDs) + } + + // for containers + containerMap := make(map[string]*corev1.Container, len(pod.Spec.Containers)) + for i := range pod.Spec.Containers { + container := &pod.Spec.Containers[i] + containerMap[container.Name] = container + } + for i := range pod.Status.ContainerStatuses { + containerStat := &pod.Status.ContainerStatuses[i] + if containerStat.State.Running == nil || len(containerStat.ContainerID) <= 0 { + klog.V(6).Infof("skip sync core sched cookie for non-running container %s/%s, ID %s, state %+v", + podMeta.Key(), containerStat.Name, containerStat.ContainerID, containerStat.State) + continue + } + + container, exist := containerMap[containerStat.Name] + if !exist { + klog.V(5).Infof("failed to find container %s/%s during sync core sched cookie", + podMeta.Key(), containerStat.Name) + continue + } + + containerPIDs, err := p.getNormalContainerPIDs(podMeta, containerStat) + if err != nil { + klog.V(5).Infof("failed to get container %s PID for pod %s, err: %s", + container.Name, podMeta.Key(), err) + continue + } + + containerToPIDs = append(containerToPIDs, &containerPID{ + ContainerName: containerStat.Name, + ContainerID: containerStat.ContainerID, + PID: containerPIDs, + }) + count += len(containerPIDs) + } + + klog.V(6).Infof("get PIDs for pod %s finished, sandbox and container num %v, PID num %v", + podMeta.Key(), len(containerToPIDs), count) + return containerToPIDs +} + +func recordContainerCookieMetrics(containerCtx *protocol.ContainerContext, groupID string, cookieID uint64) { + metrics.RecordContainerCoreSchedCookie(containerCtx.Request.PodMeta.Namespace, + containerCtx.Request.PodMeta.Name, containerCtx.Request.PodMeta.UID, + containerCtx.Request.ContainerMeta.Name, containerCtx.Request.ContainerMeta.ID, + groupID, cookieID) + metrics.RecordCoreSchedCookieManageStatus(groupID, true) +} + +func resetContainerCookieMetrics(containerCtx *protocol.ContainerContext, groupID string, lastCookieID uint64) { + metrics.ResetContainerCoreSchedCookie(containerCtx.Request.PodMeta.Namespace, + containerCtx.Request.PodMeta.Name, containerCtx.Request.PodMeta.UID, + containerCtx.Request.ContainerMeta.Name, containerCtx.Request.ContainerMeta.ID, + groupID, lastCookieID) +} diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/helper_test.go b/pkg/koordlet/runtimehooks/hooks/coresched/helper_test.go new file mode 100644 index 000000000..ac825158a --- /dev/null +++ b/pkg/koordlet/runtimehooks/hooks/coresched/helper_test.go @@ -0,0 +1,1006 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package coresched + +import ( + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + + "github.com/koordinator-sh/koordinator/apis/extension" + slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1" + "github.com/koordinator-sh/koordinator/pkg/koordlet/resourceexecutor" + "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks" + sysutil "github.com/koordinator-sh/koordinator/pkg/koordlet/util/system" +) + +func Test_getCookie(t *testing.T) { + type fields struct { + coreSchedExtended sysutil.CoreSchedExtendedInterface + } + type args struct { + pids []uint32 + groupID string + } + tests := []struct { + name string + fields fields + args args + want uint64 + want1 []uint32 + wantErr bool + }{ + { + name: "no pid to sync", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + }, map[uint32]uint32{ + 1: 1, + }, map[uint32]bool{}), + }, + args: args{ + pids: nil, + }, + want: 0, + want1: nil, + wantErr: false, + }, + { + name: "sync default cookie", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 1000: 0, + 1001: 0, + 1002: 0, + }, map[uint32]uint32{ + 1: 1, + 1000: 0, + 1001: 0, + 1002: 0, + }, map[uint32]bool{ + 1004: true, + }), + }, + args: args{ + pids: []uint32{ + 1000, + 1001, + 1002, + 1003, + 1004, + }, + }, + want: 0, + want1: nil, + wantErr: false, + }, + { + name: "sync for multiple cookies and use the first new cookie", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 1000: 100000, + 1001: 100000, + 1002: 200000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1002, + }, map[uint32]bool{ + 1004: true, + }), + }, + args: args{ + pids: []uint32{ + 1000, + 1001, + 1002, + 1003, + 1004, + }, + }, + want: 100000, + want1: []uint32{ + 1000, + 1001, + }, + wantErr: false, + }, + { + name: "all pids get failed", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1002, + }, map[uint32]bool{ + 1000: true, + 1001: true, + 1002: true, + 1004: true, + }), + }, + args: args{ + pids: []uint32{ + 1000, + 1001, + 1002, + }, + }, + want: 0, + want1: nil, + wantErr: false, + }, + { + name: "sync cookie correctly", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 1000: 100000, + 1001: 100000, + 1002: 100000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1002, + }, map[uint32]bool{ + 1004: true, + }), + }, + args: args{ + pids: []uint32{ + 1000, + 1001, + 1002, + 1003, + 1004, + }, + }, + want: 100000, + want1: []uint32{ + 1000, + 1001, + 1002, + }, + wantErr: false, + }, + { + name: "sync cookie correctly 1", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 1000: 100000, + 1001: 100000, + 1002: 100000, + 1010: 100000, + 2000: 200000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1002, + 1010: 1010, + }, map[uint32]bool{ + 1001: true, + 1004: true, + }), + }, + args: args{ + pids: []uint32{ + 1000, + 1001, + 1002, + 1003, + 1004, + 1010, + }, + }, + want: 100000, + want1: []uint32{ + 1000, + 1002, + 1010, + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := newPlugin() + p.cse = tt.fields.coreSchedExtended + got, got1, gotErr := p.getCookie(tt.args.pids, tt.args.groupID) + assert.Equal(t, tt.wantErr, gotErr != nil, gotErr) + assert.Equal(t, tt.want, got) + assert.Equal(t, tt.want1, got1) + }) + } +} + +func Test_addCookie(t *testing.T) { + type fields struct { + coreSchedExtended sysutil.CoreSchedExtendedInterface + nextCookieID uint64 + } + type args struct { + pids []uint32 + groupID string + } + tests := []struct { + name string + fields fields + args args + want uint64 + want1 []uint32 + wantErr bool + }{ + { + name: "no pid to add", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 2: 0, + }, map[uint32]uint32{ + 1: 1, + 2: 2, + }, map[uint32]bool{}), + }, + args: args{ + pids: nil, + }, + want: 0, + want1: nil, + wantErr: false, + }, + { + name: "add cookie for pids with non-default cookie", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 2: 0, + 1000: 100000, + 1001: 100000, + }, map[uint32]uint32{ + 1: 1, + 2: 2, + 1000: 1000, + 1001: 1001, + }, map[uint32]bool{}), + nextCookieID: 100000, + }, + args: args{ + pids: []uint32{ + 1000, + 1001, + }, + }, + want: 100000, + want1: []uint32{ + 1000, + 1001, + }, + wantErr: false, + }, + { + name: "failed to add cookie for beginning pid", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 2: 0, + 1000: 0, + 1001: 0, + }, map[uint32]uint32{ + 1: 1, + 2: 2, + 1000: 1000, + 1001: 1001, + }, map[uint32]bool{ + 1000: true, + 1002: true, + }), + nextCookieID: 100000, + }, + args: args{ + pids: []uint32{ + 1000, + 1001, + 1002, + }, + }, + want: 0, + want1: nil, + wantErr: true, + }, + { + name: "add cookie correctly", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 2: 0, + 1000: 0, + 1001: 0, + }, map[uint32]uint32{ + 1: 1, + 2: 2, + 1000: 1000, + 1001: 1001, + }, map[uint32]bool{ + 1002: true, + }), + nextCookieID: 100000, + }, + args: args{ + pids: []uint32{ + 1000, + 1001, + 1002, + }, + }, + want: 100000, + want1: []uint32{ + 1000, + 1001, + }, + wantErr: false, + }, + { + name: "add cookie correctly 2", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 2: 0, + 1000: 0, + 1001: 0, + 1002: 0, + 1010: 100000, + }, map[uint32]uint32{ + 1: 1, + 2: 2, + 1000: 1000, + 1001: 1001, + 1002: 1002, + 1010: 1010, + }, map[uint32]bool{ + 1002: true, + }), + nextCookieID: 200000, + }, + args: args{ + pids: []uint32{ + 1000, + 1001, + 1002, + 1010, + }, + }, + want: 200000, + want1: []uint32{ + 1000, + 1001, + 1010, + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := newPlugin() + curPID := uint32(2) + p.cse = tt.fields.coreSchedExtended + f := tt.fields.coreSchedExtended.(*sysutil.FakeCoreSchedExtended) + f.SetCurPID(curPID) + f.SetNextCookieID(tt.fields.nextCookieID) + got, got1, gotErr := p.addCookie(tt.args.pids, tt.args.groupID) + assert.Equal(t, tt.wantErr, gotErr != nil, gotErr) + assert.Equal(t, tt.want, got) + assert.Equal(t, tt.want1, got1) + got, gotErr = f.Get(sysutil.CoreSchedScopeThread, curPID) + assert.NoError(t, gotErr) + assert.Equal(t, uint64(0), got) + }) + } +} + +func Test_assignCookie(t *testing.T) { + type fields struct { + coreSchedExtended sysutil.CoreSchedExtendedInterface + } + type args struct { + pids []uint32 + siblingPIDs []uint32 + groupID string + targetCookieID uint64 + } + tests := []struct { + name string + fields fields + args args + want []uint32 + want1 []uint32 + wantErr bool + }{ + { + name: "no pid to assign", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{}, + map[uint32]uint32{}, + map[uint32]bool{}), + }, + args: args{ + groupID: "1", + targetCookieID: 100000, + }, + want: nil, + want1: nil, + wantErr: false, + }, + { + name: "all pid unknown", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 1000: 100000, + 1001: 100000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1002, + }, map[uint32]bool{ + 1000: true, + 1001: true, + 1002: true, + }), + }, + args: args{ + pids: []uint32{ + 1001, + 1002, + }, + siblingPIDs: []uint32{ + 1000, + }, + groupID: "1", + targetCookieID: 100000, + }, + want: nil, + want1: nil, + wantErr: false, + }, + { + name: "no valid sibling pid to share", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 1000: 100000, + 1001: 100000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1002, + }, map[uint32]bool{ + 1000: true, + }), + }, + args: args{ + pids: []uint32{ + 1001, + 1002, + }, + siblingPIDs: []uint32{ + 1000, + }, + groupID: "1", + targetCookieID: 100000, + }, + want: nil, + want1: []uint32{ + 1000, + }, + wantErr: true, + }, + { + name: "assign pid successfully", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 1000: 100000, + 1001: 0, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1002, + }, map[uint32]bool{ + 1002: true, + }), + }, + args: args{ + pids: []uint32{ + 1001, + 1002, + }, + siblingPIDs: []uint32{ + 1000, + }, + groupID: "1", + targetCookieID: 100000, + }, + want: []uint32{ + 1001, + }, + want1: nil, + wantErr: false, + }, + { + name: "assign pid successfully 1", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 1000: 100000, + 1001: 0, + 1002: 0, + 1003: 0, + 1010: 100000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1002, + 1003: 1003, + 1010: 1010, + }, map[uint32]bool{ + 1002: true, + }), + }, + args: args{ + pids: []uint32{ + 1001, + 1002, + 1003, + 1010, + }, + siblingPIDs: []uint32{ + 1000, + }, + groupID: "1", + targetCookieID: 100000, + }, + want: []uint32{ + 1001, + 1003, + 1010, + }, + want1: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := newPlugin() + p.cse = tt.fields.coreSchedExtended + got, got1, gotErr := p.assignCookie(tt.args.pids, tt.args.siblingPIDs, tt.args.groupID, tt.args.targetCookieID) + assert.Equal(t, tt.wantErr, gotErr != nil, gotErr) + assert.Equal(t, tt.want, got) + assert.Equal(t, tt.want1, got1) + }) + } +} + +func Test_clearCookie(t *testing.T) { + type fields struct { + coreSchedExtended sysutil.CoreSchedExtendedInterface + } + type args struct { + pids []uint32 + groupID string + lastCookieID uint64 + } + tests := []struct { + name string + fields fields + args args + want []uint32 + }{ + { + name: "no pid to clear", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{}, + map[uint32]uint32{}, + map[uint32]bool{}), + }, + args: args{ + groupID: "1", + lastCookieID: 100000, + }, + want: nil, + }, + { + name: "all pid unknown", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 1000: 100000, + 1001: 100000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1002, + }, map[uint32]bool{ + 1000: true, + 1001: true, + 1002: true, + }), + }, + args: args{ + pids: []uint32{ + 1001, + 1002, + }, + groupID: "1", + lastCookieID: 100000, + }, + want: nil, + }, + { + name: "clear pid correctly", + fields: fields{ + coreSchedExtended: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 1000: 100000, + 1001: 100000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1002, + }, map[uint32]bool{ + 1002: true, + }), + }, + args: args{ + pids: []uint32{ + 1001, + 1002, + }, + groupID: "1", + lastCookieID: 100000, + }, + want: []uint32{ + 1001, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := newPlugin() + p.cse = tt.fields.coreSchedExtended + got := p.clearCookie(tt.args.pids, tt.args.groupID, tt.args.lastCookieID) + assert.Equal(t, tt.want, got) + }) + } +} + +func Test_isPodEnabled(t *testing.T) { + type field struct { + rule *Rule + } + type args struct { + podAnnotations map[string]string + podLabels map[string]string + podKubeQOS corev1.PodQOSClass + podUID string + } + tests := []struct { + name string + field field + args args + want bool + want1 string + }{ + { + name: "pod enabled on annotation", + field: field{ + rule: testGetEnabledRule(), + }, + args: args{ + podAnnotations: map[string]string{}, + podLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + podUID: "xxx", + }, + want: true, + want1: "group-xxx-expeller", + }, + { + name: "pod enabled on annotation 1", + field: field{ + rule: testGetDisabledRule(), + }, + args: args{ + podAnnotations: map[string]string{}, + podLabels: map[string]string{ + slov1alpha1.LabelCoreSchedGroupID: "", + }, + podUID: "xxx", + }, + want: true, + want1: "xxx", + }, + { + name: "pod disabled on annotation", + field: field{ + rule: testGetEnabledRule(), + }, + args: args{ + podAnnotations: map[string]string{}, + podLabels: map[string]string{ + slov1alpha1.LabelCoreSchedGroupID: slov1alpha1.CoreSchedGroupIDNone, + }, + podUID: "xxx", + }, + want: false, + want1: slov1alpha1.CoreSchedGroupIDNone, + }, + { + name: "pod enabled according to nodeSLO", + field: field{ + rule: testGetEnabledRule(), + }, + args: args{ + podKubeQOS: corev1.PodQOSBurstable, + podUID: "xxx", + }, + want: true, + want1: "xxx-expeller", + }, + { + name: "pod enabled according to nodeSLO 1", + field: field{ + rule: testGetEnabledRule(), + }, + args: args{ + podLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + }, + podAnnotations: map[string]string{}, + podKubeQOS: corev1.PodQOSGuaranteed, + podUID: "xxx", + }, + want: true, + want1: "xxx-expeller", + }, + { + name: "pod enabled according to nodeSLO 2", + field: field{ + rule: &Rule{ + podQOSParams: map[extension.QoSClass]Param{ + extension.QoSLSE: testGetDisabledRuleParam(), + extension.QoSLSR: testGetDisabledRuleParam(), + extension.QoSLS: { + IsPodEnabled: true, + IsExpeller: false, + IsCPUIdle: false, + }, + extension.QoSBE: { + IsPodEnabled: true, + IsExpeller: false, + IsCPUIdle: true, + }, + }, + kubeQOSPodParams: map[corev1.PodQOSClass]Param{ + corev1.PodQOSGuaranteed: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + corev1.PodQOSBurstable: { + IsPodEnabled: true, + IsExpeller: false, + IsCPUIdle: false, + }, + corev1.PodQOSBestEffort: { + IsPodEnabled: true, + IsExpeller: false, + IsCPUIdle: true, + }, + }, + }, + }, + args: args{ + podLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + }, + podAnnotations: map[string]string{}, + podKubeQOS: corev1.PodQOSBurstable, + podUID: "xxx", + }, + want: true, + want1: "xxx", + }, + { + name: "pod disabled according to nodeSLO", + field: field{ + rule: testGetDisabledRule(), + }, + args: args{ + podKubeQOS: corev1.PodQOSBestEffort, + podUID: "xxx", + }, + want: false, + want1: "xxx", + }, + { + name: "pod disabled according to nodeSLO 1", + field: field{ + rule: &Rule{ + podQOSParams: map[extension.QoSClass]Param{ + extension.QoSLSE: testGetDisabledRuleParam(), + extension.QoSLSR: testGetDisabledRuleParam(), + extension.QoSLS: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + extension.QoSBE: { + IsPodEnabled: true, + IsExpeller: false, + IsCPUIdle: true, + }, + }, + kubeQOSPodParams: map[corev1.PodQOSClass]Param{ + corev1.PodQOSGuaranteed: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + corev1.PodQOSBurstable: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + corev1.PodQOSBestEffort: { + IsPodEnabled: true, + IsExpeller: false, + IsCPUIdle: true, + }, + }, + }, + }, + args: args{ + podLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLSR), + }, + podAnnotations: map[string]string{}, + podKubeQOS: corev1.PodQOSGuaranteed, + podUID: "xxx", + }, + want: false, + want1: "xxx", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := &Plugin{ + rule: tt.field.rule, + } + got, got1 := p.getPodEnabledAndGroup(tt.args.podAnnotations, tt.args.podLabels, tt.args.podKubeQOS, tt.args.podUID) + assert.Equal(t, tt.want, got) + assert.Equal(t, tt.want1, got1) + }) + } +} + +func Test_getContainerPIDs(t *testing.T) { + type fields struct { + prepareFn func(helper *sysutil.FileTestUtil) + useCgroupV2 bool + } + tests := []struct { + name string + fields fields + arg string + want []uint32 + wantErr bool + }{ + { + name: "get container PIDs correctly", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteCgroupFileContents("kubepods/podxxxxxx/yyyyyy", sysutil.CPUProcs, "12344\n12345\n") + }, + useCgroupV2: false, + }, + arg: "kubepods/podxxxxxx/yyyyyy", + want: []uint32{ + 12344, + 12345, + }, + wantErr: false, + }, + { + name: "aborted to get PIDs when cgroup dir not exist", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) {}, + useCgroupV2: false, + }, + arg: "kubepods/podxxxxxx/yyyyyy", + want: nil, + wantErr: false, + }, + { + name: "consider container pids as PIDs when PIDs not exist", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteCgroupFileContents("kubepods/podxxxxxx/yyyyyy", sysutil.CPUProcs, "12344\n12345\n12350\n") + }, + }, + arg: "kubepods/podxxxxxx/yyyyyy", + want: []uint32{ + 12344, + 12345, + 12350, + }, + wantErr: false, + }, + { + name: "get container PIDs correctly on cgroup-v2", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteCgroupFileContents("kubepods/podxxxxxx/yyyyyy", sysutil.CPUProcsV2, "12344\n12345\n12350\n") + }, + useCgroupV2: true, + }, + arg: "kubepods/podxxxxxx/yyyyyy", + want: []uint32{ + 12344, + 12345, + 12350, + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + helper := sysutil.NewFileTestUtil(t) + defer helper.Cleanup() + helper.SetCgroupsV2(tt.fields.useCgroupV2) + if tt.fields.prepareFn != nil { + tt.fields.prepareFn(helper) + } + + p := newPlugin() + p.Setup(hooks.Options{ + Reader: resourceexecutor.NewCgroupReader(), + }) + got, gotErr := p.getContainerPIDs(tt.arg) + assert.Equal(t, tt.wantErr, gotErr != nil) + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/rule.go b/pkg/koordlet/runtimehooks/hooks/coresched/rule.go new file mode 100644 index 000000000..8296c39cc --- /dev/null +++ b/pkg/koordlet/runtimehooks/hooks/coresched/rule.go @@ -0,0 +1,251 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package coresched + +import ( + "fmt" + "reflect" + "sort" + "sync" + + corev1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" + + "github.com/koordinator-sh/koordinator/apis/extension" + slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1" + "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/protocol" + "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/reconciler" + "github.com/koordinator-sh/koordinator/pkg/koordlet/statesinformer" +) + +type Param struct { + IsPodEnabled bool + IsExpeller bool + IsCPUIdle bool +} + +func newParam(qosCfg *slov1alpha1.CPUQOSCfg, policy slov1alpha1.CPUQOSPolicy) Param { + isPolicyCoreSched := policy == slov1alpha1.CPUQOSPolicyCoreSched + return Param{ + IsPodEnabled: isPolicyCoreSched && *qosCfg.Enable, + IsExpeller: isPolicyCoreSched && *qosCfg.CoreExpeller, + IsCPUIdle: isPolicyCoreSched && *qosCfg.SchedIdle == 1, + } +} + +type Rule struct { + lock sync.RWMutex + podQOSParams map[extension.QoSClass]Param + kubeQOSPodParams map[corev1.PodQOSClass]Param +} + +func newRule() *Rule { + return &Rule{ + podQOSParams: make(map[extension.QoSClass]Param), + kubeQOSPodParams: make(map[corev1.PodQOSClass]Param), + } +} + +func (r *Rule) IsInited() bool { + r.lock.RLock() + defer r.lock.RUnlock() + return len(r.podQOSParams) > 0 && len(r.kubeQOSPodParams) > 0 +} + +// IsPodEnabled returns if the pod's core sched is enabled by the rule, and if the QoS-level core expeller is enabled. +func (r *Rule) IsPodEnabled(podQoSClass extension.QoSClass, podKubeQOS corev1.PodQOSClass) (bool, bool) { + r.lock.RLock() + defer r.lock.RUnlock() + if val, exist := r.podQOSParams[podQoSClass]; exist { + return val.IsPodEnabled, val.IsExpeller + } + if val, exist := r.kubeQOSPodParams[podKubeQOS]; exist { + return val.IsPodEnabled, val.IsExpeller + } + // core sched is not needed for all types of pods, so it should be disabled by default + return false, false +} + +func (r *Rule) IsKubeQOSCPUIdle(KubeQOS corev1.PodQOSClass) bool { + r.lock.RLock() + defer r.lock.RUnlock() + if val, exist := r.kubeQOSPodParams[KubeQOS]; exist { + return val.IsCPUIdle + } + // cpu idle disabled by default + return false +} + +func (r *Rule) Update(ruleNew *Rule) bool { + r.lock.Lock() + defer r.lock.Unlock() + isEqual := reflect.DeepEqual(r.podQOSParams, ruleNew.podQOSParams) && + reflect.DeepEqual(r.kubeQOSPodParams, ruleNew.kubeQOSPodParams) + if isEqual { + return false + } + r.podQOSParams = ruleNew.podQOSParams + r.kubeQOSPodParams = ruleNew.kubeQOSPodParams + return true +} + +func (p *Plugin) parseRuleForNodeSLO(mergedNodeSLOIf interface{}) (bool, error) { + mergedNodeSLO := mergedNodeSLOIf.(*slov1alpha1.NodeSLOSpec) + qosStrategy := mergedNodeSLO.ResourceQOSStrategy + + // default policy disables + cpuPolicy := slov1alpha1.CPUQOSPolicy("") + if qosStrategy.Policies != nil && qosStrategy.Policies.CPUPolicy != nil { + cpuPolicy = *qosStrategy.Policies.CPUPolicy + } + lsrQOS := qosStrategy.LSRClass.CPUQOS + lsQOS := qosStrategy.LSClass.CPUQOS + beQOS := qosStrategy.BEClass.CPUQOS + + // setting pod rule by qos config + lsrValue := newParam(lsrQOS, cpuPolicy) + lsValue := newParam(lsQOS, cpuPolicy) + beValue := newParam(beQOS, cpuPolicy) + // setting guaranteed pod enabled if LS or LSR enabled + guaranteedPodVal := lsValue + if lsrValue.IsPodEnabled { + guaranteedPodVal = lsrValue + } + + ruleNew := &Rule{ + podQOSParams: map[extension.QoSClass]Param{ + extension.QoSLSE: lsrValue, + extension.QoSLSR: lsrValue, + extension.QoSLS: lsValue, + extension.QoSBE: beValue, + }, + kubeQOSPodParams: map[corev1.PodQOSClass]Param{ + corev1.PodQOSGuaranteed: guaranteedPodVal, + corev1.PodQOSBurstable: lsValue, + corev1.PodQOSBestEffort: beValue, + }, + } + + updated := p.rule.Update(ruleNew) + if updated { + klog.V(4).Infof("runtime hook plugin %s parse rule %v, update new rule %+v", name, updated, ruleNew) + } else { + klog.V(6).Infof("runtime hook plugin %s parse rule unchanged, rule %+v", name, ruleNew) + } + return updated, nil +} + +func (p *Plugin) parseForAllPods(e interface{}) (bool, error) { + _, ok := e.(*struct{}) + if !ok { + return false, fmt.Errorf("invalid rule type %T", e) + } + + needSync := false + p.allPodsSyncOnce.Do(func() { + needSync = true + klog.V(5).Infof("plugin %s callback the first all pods update", name) + }) + return needSync, nil +} + +func (p *Plugin) ruleUpdateCb(target *statesinformer.CallbackTarget) error { + if target == nil { + return fmt.Errorf("callback target is nil") + } + if !p.rule.IsInited() { + klog.V(4).Infof("plugin %s skipped for rule not initialized", name) + return nil + } + + // TBD: try to enable the kernel feature if needed + if supported, msg := p.SystemSupported(); !supported { + klog.V(4).Infof("plugin %s is not supported by system, msg: %s", name, msg) + return nil + } + + podMetas := target.Pods + if len(podMetas) <= 0 { + klog.V(5).Infof("plugin %s skipped for rule update, no pod passed from callback", name) + return nil + } + + if !p.InitCache(podMetas) { + klog.V(4).Infof("plugin %s aborted for cookie cache has not been initialized", name) + return nil + } + + return p.refreshForAllPods(podMetas) +} + +func (p *Plugin) refreshForAllPods(podMetas []*statesinformer.PodMeta) error { + for _, kubeQOS := range []corev1.PodQOSClass{ + corev1.PodQOSGuaranteed, corev1.PodQOSBurstable, corev1.PodQOSBestEffort} { + kubeQOSCtx := &protocol.KubeQOSContext{} + kubeQOSCtx.FromReconciler(kubeQOS) + + if err := p.SetKubeQOSCPUIdle(kubeQOSCtx); err != nil { + klog.V(4).Infof("callback %s set cpu idle for kube qos %s failed, err: %v", name, kubeQOS, err) + } else { + kubeQOSCtx.ReconcilerDone(p.executor) + klog.V(5).Infof("callback %s set cpu idle for kube qos %s finished", name, kubeQOS) + } + } + + sort.Slice(podMetas, func(i, j int) bool { + if podMetas[i].Pod == nil || podMetas[j].Pod == nil { + return podMetas[j].Pod == nil + } + return podMetas[i].Pod.CreationTimestamp.Before(&podMetas[j].Pod.CreationTimestamp) + }) + + filter := reconciler.PodQOSFilter() + for _, podMeta := range podMetas { + if podMeta.Pod == nil { + continue + } + if qos := extension.QoSClass(filter.Filter(podMeta)); qos == extension.QoSSystem { + klog.V(6).Infof("skip refresh core sched cookie for pod %s whose QoS is SYSTEM", podMeta.Key()) + continue + } + + // sandbox-container-level + sandboxContainerCtx := &protocol.ContainerContext{} + sandboxContainerCtx.FromReconciler(podMeta, "", true) + if err := p.SetContainerCookie(sandboxContainerCtx); err != nil { + klog.Warningf("failed to set core sched cookie for pod sandbox %v, err: %s", podMeta.Key(), err) + } else { + klog.V(5).Infof("set core sched cookie for pod sandbox %v finished", podMeta.Key()) + } + + // container-level + for _, containerStat := range podMeta.Pod.Status.ContainerStatuses { + containerCtx := &protocol.ContainerContext{} + containerCtx.FromReconciler(podMeta, containerStat.Name, false) + if err := p.SetContainerCookie(containerCtx); err != nil { + klog.Warningf("failed to set core sched cookie for container %s/%s, err: %s", + podMeta.Key(), containerStat.Name, err) + continue + } else { + klog.V(5).Infof("set core sched cookie for container %s/%s finished", + podMeta.Key(), containerStat.Name) + } + } + } + + return nil +} diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/rule_test.go b/pkg/koordlet/runtimehooks/hooks/coresched/rule_test.go new file mode 100644 index 000000000..a7a1e9734 --- /dev/null +++ b/pkg/koordlet/runtimehooks/hooks/coresched/rule_test.go @@ -0,0 +1,1752 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package coresched + +import ( + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" + + "github.com/koordinator-sh/koordinator/apis/extension" + slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1" + "github.com/koordinator-sh/koordinator/pkg/koordlet/resourceexecutor" + "github.com/koordinator-sh/koordinator/pkg/koordlet/statesinformer" + "github.com/koordinator-sh/koordinator/pkg/koordlet/util" + sysutil "github.com/koordinator-sh/koordinator/pkg/koordlet/util/system" + "github.com/koordinator-sh/koordinator/pkg/util/sloconfig" +) + +func TestRule(t *testing.T) { + t.Run("test", func(t *testing.T) { + r := newRule() + assert.NotNil(t, r) + assert.False(t, r.IsInited()) + + ruleNew := testGetDisabledRule() + got := r.Update(ruleNew) + assert.True(t, true, got) + assert.True(t, r.IsInited()) + got, got1 := r.IsPodEnabled(extension.QoSLS, corev1.PodQOSGuaranteed) + assert.False(t, got) + assert.False(t, got1) + got2 := r.IsKubeQOSCPUIdle(corev1.PodQOSBurstable) + assert.False(t, got2) + got, got1 = r.IsPodEnabled(extension.QoSNone, corev1.PodQOSBestEffort) + assert.False(t, got) + assert.False(t, got1) + + got = r.Update(ruleNew) + assert.False(t, false, got) + got, got1 = r.IsPodEnabled(extension.QoSBE, corev1.PodQOSBestEffort) + assert.False(t, got) + assert.False(t, got1) + got2 = r.IsKubeQOSCPUIdle(corev1.PodQOSBestEffort) + assert.False(t, got2) + + ruleNew = testGetEnabledRule() + got = r.Update(ruleNew) + assert.True(t, true, got) + got, got1 = r.IsPodEnabled(extension.QoSLS, corev1.PodQOSGuaranteed) + assert.True(t, got) + assert.True(t, got1) + got, got1 = r.IsPodEnabled(extension.QoSNone, corev1.PodQOSBurstable) + assert.True(t, got) + assert.True(t, got1) + got, got1 = r.IsPodEnabled(extension.QoSBE, corev1.PodQOSBestEffort) + assert.True(t, got) + assert.False(t, got1) + got2 = r.IsKubeQOSCPUIdle(corev1.PodQOSBurstable) + assert.False(t, got2) + + // enable CPU idle for BE + beParam := Param{ + IsPodEnabled: true, + IsExpeller: false, + IsCPUIdle: true, + } + ruleNew.podQOSParams[extension.QoSBE] = beParam + ruleNew.kubeQOSPodParams[corev1.PodQOSBestEffort] = beParam + got2 = r.IsKubeQOSCPUIdle(corev1.PodQOSBestEffort) + assert.True(t, got2) + }) +} + +func Test_parseRuleForNodeSLO(t *testing.T) { + type field struct { + rule *Rule + } + tests := []struct { + name string + field field + arg interface{} + want bool + wantErr bool + wantField *Rule + }{ + { + name: "keep disabled", + field: field{ + rule: testGetDisabledRule(), + }, + arg: &slov1alpha1.NodeSLOSpec{ + ResourceQOSStrategy: &slov1alpha1.ResourceQOSStrategy{ + Policies: sloconfig.NoneResourceQOSPolicies(), + LSRClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(false), + CPUQOS: *sloconfig.NoneCPUQOS(), + }, + }, + LSClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(false), + CPUQOS: *sloconfig.NoneCPUQOS(), + }, + }, + BEClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(false), + CPUQOS: *sloconfig.NoneCPUQOS(), + }, + }, + }, + }, + want: false, + wantErr: false, + wantField: testGetDisabledRule(), + }, + { + name: "keep enabled", + field: field{ + rule: testGetEnabledRule(), + }, + arg: &slov1alpha1.NodeSLOSpec{ + ResourceQOSStrategy: &slov1alpha1.ResourceQOSStrategy{ + Policies: testGetEnabledResourceQOSPolicies(), + LSRClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: *sloconfig.DefaultCPUQOS(extension.QoSLSR), + }, + }, + LSClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: *sloconfig.DefaultCPUQOS(extension.QoSLS), + }, + }, + BEClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: *sloconfig.DefaultCPUQOS(extension.QoSBE), + }, + }, + }, + }, + want: false, + wantErr: false, + wantField: testGetEnabledRule(), + }, + { + name: "policy disabled", + field: field{ + rule: testGetEnabledRule(), + }, + arg: &slov1alpha1.NodeSLOSpec{ + ResourceQOSStrategy: &slov1alpha1.ResourceQOSStrategy{ + Policies: sloconfig.NoneResourceQOSPolicies(), + LSRClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(false), + CPUQOS: *sloconfig.NoneCPUQOS(), + }, + }, + LSClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(false), + CPUQOS: *sloconfig.NoneCPUQOS(), + }, + }, + BEClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(false), + CPUQOS: *sloconfig.NoneCPUQOS(), + }, + }, + }, + }, + want: true, + wantErr: false, + wantField: testGetDisabledRule(), + }, + { + name: "policy enabled", + field: field{ + rule: testGetDisabledRule(), + }, + arg: &slov1alpha1.NodeSLOSpec{ + ResourceQOSStrategy: &slov1alpha1.ResourceQOSStrategy{ + Policies: testGetEnabledResourceQOSPolicies(), + LSRClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: *sloconfig.DefaultCPUQOS(extension.QoSLSR), + }, + }, + LSClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: *sloconfig.DefaultCPUQOS(extension.QoSLS), + }, + }, + BEClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: *sloconfig.DefaultCPUQOS(extension.QoSBE), + }, + }, + }, + }, + want: true, + wantErr: false, + wantField: testGetEnabledRule(), + }, + { + name: "enabled on LS and BE", + field: field{ + rule: testGetDisabledRule(), + }, + arg: &slov1alpha1.NodeSLOSpec{ + ResourceQOSStrategy: &slov1alpha1.ResourceQOSStrategy{ + Policies: testGetEnabledResourceQOSPolicies(), + LSRClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(false), + CPUQOS: *sloconfig.NoneCPUQOS(), + }, + }, + LSClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: slov1alpha1.CPUQOS{ + GroupIdentity: pointer.Int64(2), + SchedIdle: pointer.Int64(0), + CoreExpeller: pointer.Bool(true), + }, + }, + }, + BEClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: slov1alpha1.CPUQOS{ + GroupIdentity: pointer.Int64(-1), + SchedIdle: pointer.Int64(1), + CoreExpeller: pointer.Bool(false), + }, + }, + }, + }, + }, + want: true, + wantErr: false, + wantField: &Rule{ + podQOSParams: map[extension.QoSClass]Param{ + extension.QoSLSE: { + IsPodEnabled: false, + IsExpeller: false, + IsCPUIdle: false, + }, + extension.QoSLSR: { + IsPodEnabled: false, + IsExpeller: false, + IsCPUIdle: false, + }, + extension.QoSLS: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + extension.QoSBE: { + IsPodEnabled: true, + IsExpeller: false, + IsCPUIdle: true, + }, + }, + kubeQOSPodParams: map[corev1.PodQOSClass]Param{ + corev1.PodQOSGuaranteed: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + corev1.PodQOSBurstable: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + corev1.PodQOSBestEffort: { + IsPodEnabled: true, + IsExpeller: false, + IsCPUIdle: true, + }, + }, + }, + }, + { + name: "policy enabled on BE", + field: field{ + rule: &Rule{ + podQOSParams: map[extension.QoSClass]Param{ + extension.QoSLSE: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + extension.QoSLSR: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + extension.QoSLS: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + extension.QoSBE: { + IsPodEnabled: false, + IsExpeller: false, + IsCPUIdle: false, + }, + }, + kubeQOSPodParams: map[corev1.PodQOSClass]Param{ + corev1.PodQOSGuaranteed: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + corev1.PodQOSBurstable: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + corev1.PodQOSBestEffort: { + IsPodEnabled: false, + IsExpeller: false, + IsCPUIdle: false, + }, + }, + }, + }, + arg: &slov1alpha1.NodeSLOSpec{ + ResourceQOSStrategy: &slov1alpha1.ResourceQOSStrategy{ + Policies: testGetEnabledResourceQOSPolicies(), + LSRClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: *sloconfig.DefaultCPUQOS(extension.QoSLSR), + }, + }, + LSClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: *sloconfig.DefaultCPUQOS(extension.QoSLS), + }, + }, + BEClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: slov1alpha1.CPUQOS{ + GroupIdentity: pointer.Int64(-1), + SchedIdle: pointer.Int64(1), + CoreExpeller: pointer.Bool(false), + }, + }, + }, + }, + }, + want: true, + wantErr: false, + wantField: &Rule{ + podQOSParams: map[extension.QoSClass]Param{ + extension.QoSLSE: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + extension.QoSLSR: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + extension.QoSLS: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + extension.QoSBE: { + IsPodEnabled: true, + IsExpeller: false, + IsCPUIdle: true, + }, + }, + kubeQOSPodParams: map[corev1.PodQOSClass]Param{ + corev1.PodQOSGuaranteed: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + corev1.PodQOSBurstable: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + corev1.PodQOSBestEffort: { + IsPodEnabled: true, + IsExpeller: false, + IsCPUIdle: true, + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := &Plugin{ + rule: tt.field.rule, + } + got, gotErr := p.parseRuleForNodeSLO(tt.arg) + assert.Equal(t, tt.wantErr, gotErr != nil) + assert.Equal(t, tt.want, got) + assert.Equal(t, tt.wantField, p.rule) + }) + } +} + +func Test_parseForAllPods(t *testing.T) { + type fields struct { + preparePluginFn func(p *Plugin) + } + tests := []struct { + name string + fields fields + arg interface{} + want bool + wantErr bool + }{ + { + name: "parse rule failed", + arg: nil, + want: false, + wantErr: true, + }, + { + name: "trigger callback since not synced", + arg: &struct{}{}, + want: true, + wantErr: false, + }, + { + name: "not trigger callback since synced", + fields: fields{ + preparePluginFn: func(p *Plugin) { + p.allPodsSyncOnce.Do(func() {}) + }, + }, + arg: &struct{}{}, + want: false, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := newPlugin() + if tt.fields.preparePluginFn != nil { + tt.fields.preparePluginFn(p) + } + got, gotErr := p.parseForAllPods(tt.arg) + assert.Equal(t, tt.want, got) + assert.Equal(t, tt.wantErr, gotErr != nil, gotErr) + }) + } +} + +func Test_ruleUpdateCb(t *testing.T) { + type fields struct { + prepareFn func(helper *sysutil.FileTestUtil) + plugin *Plugin + preparePluginFn func(p *Plugin) + cse sysutil.CoreSchedExtendedInterface + } + type wantFields struct { + rule *Rule + sysSupported *bool + initialized bool + cookieToPIDs map[uint64][]uint32 + groupToCookie map[string]uint64 + parentDirToCPUIdle map[string]int64 + } + tests := []struct { + name string + fields fields + arg *statesinformer.CallbackTarget + wantErr bool + wantFields wantFields + }{ + { + name: "target invalid", + fields: fields{ + plugin: newPlugin(), + }, + arg: nil, + wantErr: true, + wantFields: wantFields{ + rule: newRule(), + sysSupported: nil, + initialized: false, + }, + }, + { + name: "rule not inited", + fields: fields{ + plugin: newPlugin(), + }, + arg: &statesinformer.CallbackTarget{ + Pods: []*statesinformer.PodMeta{}, + }, + wantErr: false, + wantFields: wantFields{ + rule: newRule(), + sysSupported: nil, + initialized: false, + }, + }, + { + name: "system does not support core sched", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + featuresPath := sysutil.SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, `FEATURE_A FEATURE_B FEATURE_C`) + }, + plugin: newPlugin(), + preparePluginFn: func(p *Plugin) { + p.rule = testGetEnabledRule() + }, + }, + arg: &statesinformer.CallbackTarget{ + Pods: []*statesinformer.PodMeta{}, + }, + wantErr: false, + wantFields: wantFields{ + rule: testGetEnabledRule(), + sysSupported: pointer.Bool(false), + initialized: false, + }, + }, + { + name: "no cookie has been synced", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + featuresPath := sysutil.SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, `FEATURE_A FEATURE_B FEATURE_C CORE_SCHED`) + cpuIdle, err := sysutil.GetCgroupResource(sysutil.CPUIdleName) + assert.NoError(t, err) + guaranteedQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSGuaranteed) + burstableQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSBurstable) + besteffortQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSBestEffort) + helper.WriteCgroupFileContents(guaranteedQOSDir, cpuIdle, "0") + helper.WriteCgroupFileContents(burstableQOSDir, cpuIdle, "0") + helper.WriteCgroupFileContents(besteffortQOSDir, cpuIdle, "0") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + p.executor = resourceexecutor.NewTestResourceExecutor() + p.initialized.Store(false) + }, + }, + arg: &statesinformer.CallbackTarget{ + Pods: []*statesinformer.PodMeta{}, + }, + wantErr: false, + wantFields: wantFields{ + rule: testGetEnabledRule(), + sysSupported: pointer.Bool(true), + initialized: false, + parentDirToCPUIdle: map[string]int64{ + util.GetPodQoSRelativePath(corev1.PodQOSGuaranteed): 0, + util.GetPodQoSRelativePath(corev1.PodQOSBurstable): 0, + util.GetPodQoSRelativePath(corev1.PodQOSBestEffort): 0, + }, + }, + }, + { + name: "sync cookie correctly", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + featuresPath := sysutil.SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, `FEATURE_A FEATURE_B FEATURE_C CORE_SCHED`) + cpuset, err := sysutil.GetCgroupResource(sysutil.CPUSetCPUSName) + assert.NoError(t, err) + cpuIdle, err := sysutil.GetCgroupResource(sysutil.CPUIdleName) + assert.NoError(t, err) + guaranteedQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSGuaranteed) + burstableQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSBurstable) + besteffortQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSBestEffort) + helper.WriteCgroupFileContents(guaranteedQOSDir, cpuIdle, "0") + helper.WriteCgroupFileContents(burstableQOSDir, cpuIdle, "0") + helper.WriteCgroupFileContents(besteffortQOSDir, cpuIdle, "0") + sandboxContainerCgroupDir := testGetContainerCgroupParentDir(t, "kubepods.slice/kubepods-podxxxxxx.slice", "containerd://aaaaaa") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcs, "12340\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcsV2, "12340\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, cpuset, "0-127") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice", cpuIdle, "0") + containerCgroupDir := testGetContainerCgroupParentDir(t, "kubepods.slice/kubepods-podxxxxxx.slice", "containerd://yyyyyy") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcsV2, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents(containerCgroupDir, cpuset, "0-127") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice", cpuIdle, "0") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + p.executor = resourceexecutor.NewTestResourceExecutor() + p.initialized.Store(false) + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 12340: 1000000, + 12344: 1000000, + 12345: 1000000, + 12346: 1000000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12340: 12340, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12346: true, + }), + }, + arg: &statesinformer.CallbackTarget{ + Pods: []*statesinformer.PodMeta{ + { + CgroupDir: "kubepods.slice/kubepods-podxxxxxx.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + UID: "xxxxxx", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://yyyyyy", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + }, + { + CgroupDir: "kubepods.slice/kubepods-podnnnnnn.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-1", + UID: "nnnnnn", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLSR), + slov1alpha1.LabelCoreSchedGroupID: "group-nnn", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container-1", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://mmmmmm", + }, + }, + }, + }, + }, + }, + }, + wantErr: false, + wantFields: wantFields{ + rule: testGetEnabledRule(), + sysSupported: pointer.Bool(true), + initialized: true, + cookieToPIDs: map[uint64][]uint32{ + 1000000: { + 12340, + 12344, + 12345, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx-expeller": 1000000, + }, + parentDirToCPUIdle: map[string]int64{ + util.GetPodQoSRelativePath(corev1.PodQOSGuaranteed): 0, + util.GetPodQoSRelativePath(corev1.PodQOSBurstable): 0, + util.GetPodQoSRelativePath(corev1.PodQOSBestEffort): 0, + "kubepods.slice/kubepods-podxxxxxx.slice": 0, + }, + }, + }, + { + name: "sync cookie correctly with CPU idle enabled", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + featuresPath := sysutil.SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, `FEATURE_A FEATURE_B FEATURE_C CORE_SCHED`) + cpuset, err := sysutil.GetCgroupResource(sysutil.CPUSetCPUSName) + assert.NoError(t, err) + cpuIdle, err := sysutil.GetCgroupResource(sysutil.CPUIdleName) + assert.NoError(t, err) + guaranteedQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSGuaranteed) + burstableQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSBurstable) + besteffortQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSBestEffort) + helper.WriteCgroupFileContents(guaranteedQOSDir, cpuIdle, "0") + helper.WriteCgroupFileContents(burstableQOSDir, cpuIdle, "0") + helper.WriteCgroupFileContents(besteffortQOSDir, cpuIdle, "0") + sandboxContainerCgroupDir := testGetContainerCgroupParentDir(t, "kubepods.slice/kubepods-podxxxxxx.slice", "containerd://aaaaaa") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcs, "12340\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcsV2, "12340\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, cpuset, "0-127") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice", cpuIdle, "0") + containerCgroupDir := testGetContainerCgroupParentDir(t, "kubepods.slice/kubepods-podxxxxxx.slice", "containerd://yyyyyy") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcsV2, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents(containerCgroupDir, cpuset, "0-127") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice", cpuIdle, "0") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + p.rule = testGetAllEnabledRule() + p.executor = resourceexecutor.NewTestResourceExecutor() + p.initialized.Store(false) + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 12340: 1000000, + 12344: 1000000, + 12345: 1000000, + 12346: 1000000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12340: 12340, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12346: true, + }), + }, + arg: &statesinformer.CallbackTarget{ + Pods: []*statesinformer.PodMeta{ + { + CgroupDir: "kubepods.slice/kubepods-podxxxxxx.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + UID: "xxxxxx", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://yyyyyy", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + }, + { + CgroupDir: "kubepods.slice/kubepods-podnnnnnn.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-1", + UID: "nnnnnn", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLSR), + slov1alpha1.LabelCoreSchedGroupID: "group-nnn", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container-1", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://mmmmmm", + }, + }, + }, + }, + }, + }, + }, + wantErr: false, + wantFields: wantFields{ + rule: testGetAllEnabledRule(), + sysSupported: pointer.Bool(true), + initialized: true, + cookieToPIDs: map[uint64][]uint32{ + 1000000: { + 12340, + 12344, + 12345, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx-expeller": 1000000, + }, + parentDirToCPUIdle: map[string]int64{ + util.GetPodQoSRelativePath(corev1.PodQOSGuaranteed): 0, + util.GetPodQoSRelativePath(corev1.PodQOSBurstable): 0, + util.GetPodQoSRelativePath(corev1.PodQOSBestEffort): 1, + "kubepods.slice/kubepods-podxxxxxx.slice": 0, + }, + }, + }, + { + name: "sync cookie correctly excluding SYSTEM pods", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + featuresPath := sysutil.SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, `FEATURE_A FEATURE_B FEATURE_C CORE_SCHED`) + cpuset, err := sysutil.GetCgroupResource(sysutil.CPUSetCPUSName) + assert.NoError(t, err) + cpuIdle, err := sysutil.GetCgroupResource(sysutil.CPUIdleName) + assert.NoError(t, err) + guaranteedQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSGuaranteed) + burstableQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSBurstable) + besteffortQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSBestEffort) + helper.WriteCgroupFileContents(guaranteedQOSDir, cpuIdle, "0") + helper.WriteCgroupFileContents(burstableQOSDir, cpuIdle, "0") + helper.WriteCgroupFileContents(besteffortQOSDir, cpuIdle, "0") + sandboxContainerCgroupDir := testGetContainerCgroupParentDir(t, "kubepods.slice/kubepods-podxxxxxx.slice", "containerd://aaaaaa") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcs, "12340\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcsV2, "12340\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, cpuset, "0-127") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice", cpuIdle, "0") + containerCgroupDir := testGetContainerCgroupParentDir(t, "kubepods.slice/kubepods-podxxxxxx.slice", "containerd://yyyyyy") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcsV2, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents(containerCgroupDir, cpuset, "0-127") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice", cpuIdle, "0") + sandboxContainerCgroupDir1 := testGetContainerCgroupParentDir(t, "kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podssssss.slice", "containerd://eeeeee") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir1, sysutil.CPUProcs, "100\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir1, sysutil.CPUProcsV2, "100\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir1, cpuset, "0-127") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podssssss.slice", cpuIdle, "0") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + p.executor = resourceexecutor.NewTestResourceExecutor() + p.initialized.Store(false) + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 12340: 1000000, + 12344: 1000000, + 12345: 1000000, + 12346: 1000000, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12340: 12340, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12346: true, + }), + }, + arg: &statesinformer.CallbackTarget{ + Pods: []*statesinformer.PodMeta{ + { + CgroupDir: "kubepods.slice/kubepods-podxxxxxx.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + UID: "xxxxxx", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://yyyyyy", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + }, + { + CgroupDir: "kubepods.slice/kubepods-podnnnnnn.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-1", + UID: "nnnnnn", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLSR), + slov1alpha1.LabelCoreSchedGroupID: "group-nnn", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container-1", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://mmmmmm", + }, + }, + }, + }, + }, + { + CgroupDir: "kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podssssss.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-s", + UID: "ssssss", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSSystem), + slov1alpha1.LabelCoreSchedGroupID: "group-sss", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container-s", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("0"), + corev1.ResourceMemory: resource.MustParse("0"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + QOSClass: corev1.PodQOSBurstable, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container-s", + ContainerID: "containerd://tttttt", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + }, + }, + }, + wantErr: false, + wantFields: wantFields{ + rule: testGetEnabledRule(), + sysSupported: pointer.Bool(true), + initialized: true, + cookieToPIDs: map[uint64][]uint32{ + 1000000: { + 12340, + 12344, + 12345, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx-expeller": 1000000, + }, + parentDirToCPUIdle: map[string]int64{ + util.GetPodQoSRelativePath(corev1.PodQOSGuaranteed): 0, + util.GetPodQoSRelativePath(corev1.PodQOSBurstable): 0, + util.GetPodQoSRelativePath(corev1.PodQOSBestEffort): 0, + "kubepods.slice/kubepods-podxxxxxx.slice": 0, + "kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podssssss.slice": 0, + }, + }, + }, + { + name: "sync cookie correctly with expeller and non-expeller groups", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + featuresPath := sysutil.SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, `FEATURE_A FEATURE_B FEATURE_C CORE_SCHED`) + cpuset, err := sysutil.GetCgroupResource(sysutil.CPUSetCPUSName) + assert.NoError(t, err) + cpuIdle, err := sysutil.GetCgroupResource(sysutil.CPUIdleName) + assert.NoError(t, err) + guaranteedQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSGuaranteed) + burstableQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSBurstable) + besteffortQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSBestEffort) + helper.WriteCgroupFileContents(guaranteedQOSDir, cpuIdle, "0") + helper.WriteCgroupFileContents(burstableQOSDir, cpuIdle, "0") + helper.WriteCgroupFileContents(besteffortQOSDir, cpuIdle, "0") + sandboxContainerCgroupDir := testGetContainerCgroupParentDir(t, "kubepods.slice/kubepods-podxxxxxx.slice", "containerd://aaaaaa") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcs, "12340\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcsV2, "12340\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, cpuset, "0-127") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice", cpuIdle, "0") + containerCgroupDir := testGetContainerCgroupParentDir(t, "kubepods.slice/kubepods-podxxxxxx.slice", "containerd://yyyyyy") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcsV2, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents(containerCgroupDir, cpuset, "0-127") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice", cpuIdle, "0") + sandboxContainerCgroupDir1 := testGetContainerCgroupParentDir(t, "kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podssssss.slice", "containerd://eeeeee") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir1, sysutil.CPUProcs, "100\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir1, sysutil.CPUProcsV2, "100\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir1, cpuset, "0-127") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podssssss.slice", cpuIdle, "0") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + p.executor = resourceexecutor.NewTestResourceExecutor() + p.initialized.Store(false) + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 100: 2000, + 12340: 1000000, + 12344: 1000000, + 12345: 1000000, + 12346: 1000000, + }, map[uint32]uint32{ + 1: 1, + 100: 100, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12340: 12340, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12346: true, + }), + }, + arg: &statesinformer.CallbackTarget{ + Pods: []*statesinformer.PodMeta{ + { + CgroupDir: "kubepods.slice/kubepods-podxxxxxx.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + UID: "xxxxxx", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://yyyyyy", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + }, + { + CgroupDir: "kubepods.slice/kubepods-podnnnnnn.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-1", + UID: "nnnnnn", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLSR), + slov1alpha1.LabelCoreSchedGroupID: "group-nnn", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container-1", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://mmmmmm", + }, + }, + }, + }, + }, + { + CgroupDir: "kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podssssss.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-s", + UID: "ssssss", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSBE), + slov1alpha1.LabelCoreSchedGroupID: "group-sss", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container-s", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + extension.BatchCPU: resource.MustParse("0"), + extension.BatchMemory: resource.MustParse("0"), + }, + Limits: corev1.ResourceList{ + extension.BatchCPU: resource.MustParse("2000"), + extension.BatchMemory: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + QOSClass: corev1.PodQOSBestEffort, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container-s", + ContainerID: "containerd://tttttt", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + }, + }, + }, + wantErr: false, + wantFields: wantFields{ + rule: testGetEnabledRule(), + sysSupported: pointer.Bool(true), + initialized: true, + cookieToPIDs: map[uint64][]uint32{ + 2000: { + 100, + }, + 1000000: { + 12340, + 12344, + 12345, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx-expeller": 1000000, + "group-sss": 2000, + }, + parentDirToCPUIdle: map[string]int64{ + util.GetPodQoSRelativePath(corev1.PodQOSGuaranteed): 0, + util.GetPodQoSRelativePath(corev1.PodQOSBurstable): 0, + util.GetPodQoSRelativePath(corev1.PodQOSBestEffort): 0, + "kubepods.slice/kubepods-podxxxxxx.slice": 0, + "kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podssssss.slice": 0, + }, + }, + }, + { + name: "sync cookie correctly for multiple containers", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + featuresPath := sysutil.SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, `FEATURE_A FEATURE_B FEATURE_C CORE_SCHED`) + cpuset, err := sysutil.GetCgroupResource(sysutil.CPUSetCPUSName) + assert.NoError(t, err) + cpuIdle, err := sysutil.GetCgroupResource(sysutil.CPUIdleName) + assert.NoError(t, err) + guaranteedQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSGuaranteed) + burstableQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSBurstable) + besteffortQOSDir := util.GetPodQoSRelativePath(corev1.PodQOSBestEffort) + helper.WriteCgroupFileContents(guaranteedQOSDir, cpuIdle, "0") + helper.WriteCgroupFileContents(burstableQOSDir, cpuIdle, "0") + helper.WriteCgroupFileContents(besteffortQOSDir, cpuIdle, "0") + sandboxContainerCgroupDir := testGetContainerCgroupParentDir(t, "kubepods.slice/kubepods-podxxxxxx.slice", "containerd://aaaaaa") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcs, "12340\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, sysutil.CPUProcsV2, "12340\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir, cpuset, "0-127") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice", cpuIdle, "0") + containerCgroupDir := testGetContainerCgroupParentDir(t, "kubepods.slice/kubepods-podxxxxxx.slice", "containerd://yyyyyy") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents(containerCgroupDir, sysutil.CPUProcsV2, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents(containerCgroupDir, cpuset, "0-127") + containerCgroupDir1 := testGetContainerCgroupParentDir(t, "kubepods.slice/kubepods-podxxxxxx.slice", "containerd://zzzzzz") + helper.WriteCgroupFileContents(containerCgroupDir1, sysutil.CPUProcs, "12350\n") + helper.WriteCgroupFileContents(containerCgroupDir1, sysutil.CPUProcsV2, "12350\n") + helper.WriteCgroupFileContents(containerCgroupDir1, cpuset, "0-127") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podnnnnnn.slice", cpuIdle, "0") + sandboxContainerCgroupDir1 := testGetContainerCgroupParentDir(t, "kubepods.slice/kubepods-podnnnnnn.slice", "containerd://mmmmmm") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir1, sysutil.CPUProcs, "15000\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir1, sysutil.CPUProcsV2, "15000\n") + helper.WriteCgroupFileContents(sandboxContainerCgroupDir1, cpuset, "9-12,73-76") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + p.executor = resourceexecutor.NewTestResourceExecutor() + p.initialized.Store(false) + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + p.cookieCache.SetDefault("group-xxx-expeller", newCookieCacheEntry(1000000, 12344, 12345, 12346)) + p.groupCache.SetDefault("xxxxxx/containerd://yyyyyy", "group-xxx-expeller") + // test-pod-1 missing cookie cache + p.groupCache.SetDefault("nnnnnn/containerd://mmmmmm", "group-yyy-expeller") + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 5000: 1000000, + 12340: 1000000, + 12344: 1000000, + 12345: 1000000, + 12346: 1000000, + 12350: 1100000, + 15000: 0, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 5000: 5000, + 12340: 12340, + 12344: 12344, + 12345: 12344, + 12346: 12346, + 12350: 12350, + 15000: 15000, + }, map[uint32]bool{ + 12346: true, + }), + }, + arg: &statesinformer.CallbackTarget{ + Pods: []*statesinformer.PodMeta{ + { + CgroupDir: "kubepods.slice/kubepods-podxxxxxx.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + UID: "xxxxxx", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }, + { + Name: "test-container-1", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://yyyyyy", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + { + Name: "test-container-1", + ContainerID: "containerd://zzzzzz", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + }, + { + CgroupDir: "kubepods.slice/kubepods-podnnnnnn.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-1", + UID: "nnnnnn", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLSR), + slov1alpha1.LabelCoreSchedGroupID: slov1alpha1.CoreSchedGroupIDNone, + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://mmmmmm", + }, + }, + }, + }, + }, + }, + }, + wantErr: false, + wantFields: wantFields{ + rule: testGetEnabledRule(), + sysSupported: pointer.Bool(true), + initialized: true, + cookieToPIDs: map[uint64][]uint32{ + 1000000: { + 12340, + 12344, + 12345, + 12346, + 12350, + }, + 2000000: {}, + }, + groupToCookie: map[string]uint64{ + "group-xxx-expeller": 1000000, + }, + parentDirToCPUIdle: map[string]int64{ + util.GetPodQoSRelativePath(corev1.PodQOSGuaranteed): 0, + util.GetPodQoSRelativePath(corev1.PodQOSBurstable): 0, + util.GetPodQoSRelativePath(corev1.PodQOSBestEffort): 0, + "kubepods.slice/kubepods-podxxxxxx.slice": 0, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + helper := sysutil.NewFileTestUtil(t) + defer helper.Cleanup() + if tt.fields.prepareFn != nil { + tt.fields.prepareFn(helper) + } + p := tt.fields.plugin + if tt.fields.cse != nil { + p.cse = tt.fields.cse + } + if tt.fields.preparePluginFn != nil { + tt.fields.preparePluginFn(p) + } + if p.executor != nil { + stopCh := make(chan struct{}) + defer close(stopCh) + p.executor.Run(stopCh) + } + + gotErr := p.ruleUpdateCb(tt.arg) + assert.Equal(t, tt.wantErr, gotErr != nil, gotErr) + assert.Equal(t, tt.wantFields.rule, p.rule) + assert.Equal(t, tt.wantFields.sysSupported, p.sysSupported) + assert.Equal(t, tt.wantFields.initialized, p.initialized.Load()) + for groupID, cookieID := range tt.wantFields.groupToCookie { + if cookieID <= 0 { + _, ok := p.cookieCache.Get(groupID) + assert.False(t, ok, groupID) + continue + } + + entryIf, ok := p.cookieCache.Get(groupID) + assert.True(t, ok) + entry, ok := entryIf.(*CookieCacheEntry) + assert.True(t, ok) + assert.Equal(t, cookieID, entry.GetCookieID(), groupID) + assert.Equal(t, len(tt.wantFields.cookieToPIDs[cookieID]), len(entry.GetAllPIDs()), + "expect [%v] but got [%v]", tt.wantFields.cookieToPIDs[cookieID], entry.GetAllPIDs()) + for _, pid := range tt.wantFields.cookieToPIDs[cookieID] { + assert.True(t, entry.HasPID(pid), pid) + } + } + for parentDir, wantValue := range tt.wantFields.parentDirToCPUIdle { + cpuIdle, err := sysutil.GetCgroupResource(sysutil.CPUIdleName) + assert.NoError(t, err) + gotValue := helper.ReadCgroupFileContentsInt(parentDir, cpuIdle) + assert.NotNil(t, gotValue) + assert.Equal(t, wantValue, *gotValue) + } + }) + } +} + +func testGetDisabledRuleParam() Param { + return Param{} +} + +func testGetEnabledRule() *Rule { + // use default CPUQOS + return &Rule{ + podQOSParams: map[extension.QoSClass]Param{ + extension.QoSLSE: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + extension.QoSLSR: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + extension.QoSLS: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + extension.QoSBE: { + IsPodEnabled: true, + IsExpeller: false, + IsCPUIdle: false, + }, + }, + kubeQOSPodParams: map[corev1.PodQOSClass]Param{ + corev1.PodQOSGuaranteed: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + corev1.PodQOSBurstable: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + corev1.PodQOSBestEffort: { + IsPodEnabled: true, + IsExpeller: false, + IsCPUIdle: false, + }, + }, + } +} + +func testGetAllEnabledRule() *Rule { + // use default CPUQOS and enable CPU Idle + return &Rule{ + podQOSParams: map[extension.QoSClass]Param{ + extension.QoSLSE: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + extension.QoSLSR: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + extension.QoSLS: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + extension.QoSBE: { + IsPodEnabled: true, + IsExpeller: false, + IsCPUIdle: true, + }, + }, + kubeQOSPodParams: map[corev1.PodQOSClass]Param{ + corev1.PodQOSGuaranteed: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + corev1.PodQOSBurstable: { + IsPodEnabled: true, + IsExpeller: true, + IsCPUIdle: false, + }, + corev1.PodQOSBestEffort: { + IsPodEnabled: true, + IsExpeller: false, + IsCPUIdle: true, + }, + }, + } +} + +func testGetDisabledRule() *Rule { + return &Rule{ + podQOSParams: map[extension.QoSClass]Param{ + extension.QoSLSE: testGetDisabledRuleParam(), + extension.QoSLSR: testGetDisabledRuleParam(), + extension.QoSLS: testGetDisabledRuleParam(), + extension.QoSBE: testGetDisabledRuleParam(), + }, + kubeQOSPodParams: map[corev1.PodQOSClass]Param{ + corev1.PodQOSGuaranteed: testGetDisabledRuleParam(), + corev1.PodQOSBurstable: testGetDisabledRuleParam(), + corev1.PodQOSBestEffort: testGetDisabledRuleParam(), + }, + } +} + +func testGetEnabledResourceQOSPolicies() *slov1alpha1.ResourceQOSPolicies { + cpuPolicy := slov1alpha1.CPUQOSPolicyCoreSched + return &slov1alpha1.ResourceQOSPolicies{ + CPUPolicy: &cpuPolicy, + } +} + +func testGetContainerCgroupParentDir(t *testing.T, podParentDir string, containerID string) string { + dir, err := util.GetContainerCgroupParentDirByID(podParentDir, containerID) + assert.NoError(t, err) + return dir +} diff --git a/pkg/koordlet/runtimehooks/hooks/cpuset/cpuset_test.go b/pkg/koordlet/runtimehooks/hooks/cpuset/cpuset_test.go index 02f4c2e75..e0873d30e 100644 --- a/pkg/koordlet/runtimehooks/hooks/cpuset/cpuset_test.go +++ b/pkg/koordlet/runtimehooks/hooks/cpuset/cpuset_test.go @@ -494,7 +494,7 @@ func TestUnsetPodCPUQuota(t *testing.T) { if podCtx == nil { return } - e := resourceexecutor.NewResourceUpdateExecutor() + e := resourceexecutor.NewTestResourceExecutor() stop := make(chan struct{}) defer func() { close(stop) diff --git a/pkg/koordlet/runtimehooks/hooks/groupidentity/rule_test.go b/pkg/koordlet/runtimehooks/hooks/groupidentity/rule_test.go index 4bd00fded..94fda677e 100644 --- a/pkg/koordlet/runtimehooks/hooks/groupidentity/rule_test.go +++ b/pkg/koordlet/runtimehooks/hooks/groupidentity/rule_test.go @@ -22,7 +22,6 @@ import ( "testing" "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/pointer" diff --git a/pkg/koordlet/runtimehooks/hooks/hooks.go b/pkg/koordlet/runtimehooks/hooks/hooks.go index f274677bb..ef7613d07 100644 --- a/pkg/koordlet/runtimehooks/hooks/hooks.go +++ b/pkg/koordlet/runtimehooks/hooks/hooks.go @@ -34,6 +34,7 @@ type Hook struct { } type Options struct { + Reader resourceexecutor.CgroupReader Executor resourceexecutor.ResourceUpdateExecutor } diff --git a/pkg/koordlet/runtimehooks/protocol/container_context.go b/pkg/koordlet/runtimehooks/protocol/container_context.go index 5ef45f8b1..4a9789485 100644 --- a/pkg/koordlet/runtimehooks/protocol/container_context.go +++ b/pkg/koordlet/runtimehooks/protocol/container_context.go @@ -129,11 +129,11 @@ func (c *ContainerRequest) FromReconciler(podMeta *statesinformer.PodMeta, conta if sandbox { var err error if c.ContainerMeta.ID, err = koordletutil.GetPodSandboxContainerID(podMeta.Pod); err != nil { - klog.V(4).Infof("no container id for pod %v, container may not start, %v", - util.GetPodKey(podMeta.Pod), err) + klog.V(4).Infof("failed to get sandbox container ID for pod %s, err: %s", + podMeta.Key(), err) return } else if c.ContainerMeta.ID == "" { - klog.V(4).Infof("container status is empty for pod %v, skip") + klog.V(4).Infof("container ID is empty for pod %s, pod may not start, skip", podMeta.Key()) return } } else { diff --git a/pkg/koordlet/runtimehooks/protocol/kubeqos_context.go b/pkg/koordlet/runtimehooks/protocol/kubeqos_context.go index e31e570be..b93156e02 100644 --- a/pkg/koordlet/runtimehooks/protocol/kubeqos_context.go +++ b/pkg/koordlet/runtimehooks/protocol/kubeqos_context.go @@ -91,4 +91,17 @@ func (k *KubeQOSContext) injectForExt() { *k.Response.Resources.CPUBvt, k.Request.CgroupParent) } } + if k.Response.Resources.CPUIdle != nil { + eventHelper := audit.V(3).Group(string(k.Request.KubeQOSClass)).Reason("runtime-hooks").Message( + "set kubeqos idle to %v", *k.Response.Resources.CPUIdle) + updater, err := injectCPUIdle(k.Request.CgroupParent, *k.Response.Resources.CPUIdle, eventHelper, k.executor) + if err != nil { + klog.Infof("set kubeqos %v idle %v on cgroup parent %v failed, error %v", k.Request.KubeQOSClass, + *k.Response.Resources.CPUIdle, k.Request.CgroupParent, err) + } else { + k.updaters = append(k.updaters, updater) + klog.V(5).Infof("set kubeqos %v idle %v on cgroup parent %v", k.Request.KubeQOSClass, + *k.Response.Resources.CPUIdle, k.Request.CgroupParent) + } + } } diff --git a/pkg/koordlet/runtimehooks/protocol/pod_context.go b/pkg/koordlet/runtimehooks/protocol/pod_context.go index b6883957d..f81a160d4 100644 --- a/pkg/koordlet/runtimehooks/protocol/pod_context.go +++ b/pkg/koordlet/runtimehooks/protocol/pod_context.go @@ -222,6 +222,20 @@ func (p *PodContext) injectForExt() { p.Request.PodMeta.Name, *p.Response.Resources.CPUBvt, p.Request.CgroupParent) } } + if p.Response.Resources.CPUIdle != nil { + eventHelper := audit.V(3).Pod(p.Request.PodMeta.Namespace, p.Request.PodMeta.Name).Reason("runtime-hooks").Message( + "set pod idle to %v", *p.Response.Resources.CPUIdle) + updater, err := injectCPUIdle(p.Request.CgroupParent, *p.Response.Resources.CPUIdle, eventHelper, p.executor) + if err != nil { + klog.Infof("set pod %v/%v idle %v on cgroup parent %v failed, error %v", p.Request.PodMeta.Namespace, + p.Request.PodMeta.Name, *p.Response.Resources.CPUIdle, p.Request.CgroupParent, err) + } else { + p.updaters = append(p.updaters, updater) + klog.V(5).Infof("set pod %v/%v idle %v on cgroup parent %v", p.Request.PodMeta.Namespace, + p.Request.PodMeta.Name, *p.Response.Resources.CPUIdle, p.Request.CgroupParent) + } + } + // some of pod-level cgroups are manually updated since pod-stage hooks do not support it; // kubelet may set the cgroups when pod is created or restarted, so we need to update the cgroups repeatedly if p.Response.Resources.CPUShares != nil { diff --git a/pkg/koordlet/runtimehooks/protocol/protocol.go b/pkg/koordlet/runtimehooks/protocol/protocol.go index c6859acc2..0688e2bfe 100644 --- a/pkg/koordlet/runtimehooks/protocol/protocol.go +++ b/pkg/koordlet/runtimehooks/protocol/protocol.go @@ -79,7 +79,8 @@ type Resources struct { MemoryLimit *int64 // extended resources - CPUBvt *int64 + CPUBvt *int64 + CPUIdle *int64 } func (r *Resources) IsOriginResSet() bool { @@ -166,3 +167,12 @@ func injectCPUBvt(cgroupParent string, bvtValue int64, a *audit.EventHelper, e r } return updater, nil } + +func injectCPUIdle(cgroupParent string, idleValue int64, a *audit.EventHelper, e resourceexecutor.ResourceUpdateExecutor) (resourceexecutor.ResourceUpdater, error) { + idleValueStr := strconv.FormatInt(idleValue, 10) + updater, err := resourceexecutor.DefaultCgroupUpdaterFactory.New(sysutil.CPUIdleName, cgroupParent, idleValueStr, a) + if err != nil { + return nil, err + } + return updater, nil +} diff --git a/pkg/koordlet/runtimehooks/reconciler/reconciler.go b/pkg/koordlet/runtimehooks/reconciler/reconciler.go index cb7f14d03..cef2f8656 100644 --- a/pkg/koordlet/runtimehooks/reconciler/reconciler.go +++ b/pkg/koordlet/runtimehooks/reconciler/reconciler.go @@ -295,7 +295,8 @@ func doKubeQOSCgroup(e resourceexecutor.ResourceUpdateExecutor) { continue } if err := reconcileFn(kubeQOSCtx); err != nil { - klog.Warningf("calling reconcile function %v failed, error %v", r.description, err) + klog.Warningf("calling reconcile function %v for kube qos %v failed, error %v", + r.description, kubeQOS, err) } else { kubeQOSCtx.ReconcilerDone(e) klog.V(5).Infof("calling reconcile function %v for kube qos %v finish", @@ -317,17 +318,18 @@ func (c *reconciler) reconcilePodCgroup(stopCh <-chan struct{}) { reconcileFn, ok := r.fn[r.filter.Filter(podMeta)] if !ok { klog.V(5).Infof("calling reconcile function %v aborted for pod %v, condition %s not registered", - r.description, util.GetPodKey(podMeta.Pod), r.filter.Filter(podMeta)) + r.description, podMeta.Key(), r.filter.Filter(podMeta)) continue } podCtx := protocol.HooksProtocolBuilder.Pod(podMeta) if err := reconcileFn(podCtx); err != nil { - klog.Warningf("calling reconcile function %v failed, error %v", r.description, err) + klog.Warningf("calling reconcile function %v for pod %v failed, error %v", + r.description, podMeta.Key(), err) } else { podCtx.ReconcilerDone(c.executor) klog.V(5).Infof("calling reconcile function %v for pod %v finished", - r.description, util.GetPodKey(podMeta.Pod)) + r.description, podMeta.Key()) } } @@ -335,16 +337,17 @@ func (c *reconciler) reconcilePodCgroup(stopCh <-chan struct{}) { reconcileFn, ok := r.fn[r.filter.Filter(podMeta)] if !ok { klog.V(5).Infof("calling reconcile function %v aborted for pod %v, condition %s not registered", - r.description, util.GetPodKey(podMeta.Pod), r.filter.Filter(podMeta)) + r.description, podMeta.Key(), r.filter.Filter(podMeta)) continue } sandboxContainerCtx := protocol.HooksProtocolBuilder.Sandbox(podMeta) if err := reconcileFn(sandboxContainerCtx); err != nil { - klog.Warningf("calling reconcile function %v failed for sandbox, error %v", r.description, err) + klog.Warningf("calling reconcile function %v failed for sandbox %v, error %v", + r.description, podMeta.Key(), err) } else { sandboxContainerCtx.ReconcilerDone(c.executor) klog.V(5).Infof("calling reconcile function %v for pod sandbox %v finished", - r.description, util.GetPodKey(podMeta.Pod)) + r.description, podMeta.Key()) } } @@ -352,18 +355,19 @@ func (c *reconciler) reconcilePodCgroup(stopCh <-chan struct{}) { for _, r := range globalCgroupReconcilers.containerLevel { reconcileFn, ok := r.fn[r.filter.Filter(podMeta)] if !ok { - klog.V(5).Infof("calling reconcile function %v aborted for pod %v, condition %s not registered", - r.description, util.GetPodKey(podMeta.Pod), r.filter.Filter(podMeta)) + klog.V(5).Infof("calling reconcile function %v aborted for container %v/%v, condition %s not registered", + r.description, podMeta.Key(), containerStat.Name, r.filter.Filter(podMeta)) continue } containerCtx := protocol.HooksProtocolBuilder.Container(podMeta, containerStat.Name) if err := reconcileFn(containerCtx); err != nil { - klog.Warningf("calling reconcile function %v failed, error %v", r.description, err) + klog.Warningf("calling reconcile function %v for container %v/%v failed, error %v", + r.description, podMeta.Key(), containerStat.Name, err) } else { containerCtx.ReconcilerDone(c.executor) klog.V(5).Infof("calling reconcile function %v for container %v/%v finish", - r.description, util.GetPodKey(podMeta.Pod), containerStat.Name) + r.description, podMeta.Key(), containerStat.Name) } } } diff --git a/pkg/koordlet/runtimehooks/rule/rule.go b/pkg/koordlet/runtimehooks/rule/rule.go index ca7f83331..de680010d 100644 --- a/pkg/koordlet/runtimehooks/rule/rule.go +++ b/pkg/koordlet/runtimehooks/rule/rule.go @@ -27,6 +27,10 @@ import ( "github.com/koordinator-sh/koordinator/pkg/util" ) +func init() { + globalHookRules = map[string]*Rule{} +} + type Rule struct { name string description string @@ -57,6 +61,7 @@ func Register(name, description string, injectOpts ...InjectOption) *Rule { } func (r *Rule) runUpdateCallbacks(target *statesinformer.CallbackTarget) { + klog.V(6).Infof("run update callbacks for rules, target %s", target.String()) for _, callbackFn := range r.callbacks { if err := callbackFn(target); err != nil { cbName := runtime.FuncForPC(reflect.ValueOf(callbackFn).Pointer()).Name() @@ -102,7 +107,3 @@ func UpdateRules(ruleType statesinformer.RegisterType, ruleObj interface{}, targ } } } - -func init() { - globalHookRules = map[string]*Rule{} -} diff --git a/pkg/koordlet/runtimehooks/runtimehooks.go b/pkg/koordlet/runtimehooks/runtimehooks.go index 3c24df43e..49e4eb5b7 100644 --- a/pkg/koordlet/runtimehooks/runtimehooks.go +++ b/pkg/koordlet/runtimehooks/runtimehooks.go @@ -46,6 +46,7 @@ type runtimeHook struct { nriServer *nri.NriServer reconciler reconciler.Reconciler hostAppReconciler reconciler.Reconciler + reader resourceexecutor.CgroupReader executor resourceexecutor.ResourceUpdateExecutor } @@ -87,6 +88,7 @@ func NewRuntimeHook(si statesinformer.StatesInformer, cfg *Config) (RuntimeHook, if err != nil { return nil, err } + cr := resourceexecutor.NewCgroupReader() e := resourceexecutor.NewResourceUpdateExecutor() newServerOptions := proxyserver.Options{ Network: cfg.RuntimeHooksNetwork, @@ -123,6 +125,7 @@ func NewRuntimeHook(si statesinformer.StatesInformer, cfg *Config) (RuntimeHook, } newPluginOptions := hooks.Options{ + Reader: cr, Executor: e, } @@ -135,6 +138,7 @@ func NewRuntimeHook(si statesinformer.StatesInformer, cfg *Config) (RuntimeHook, nriServer: nriServer, reconciler: reconciler.NewReconciler(newReconcilerCtx), hostAppReconciler: reconciler.NewHostAppReconciler(newReconcilerCtx), + reader: cr, executor: e, } registerPlugins(newPluginOptions) @@ -147,6 +151,8 @@ func NewRuntimeHook(si statesinformer.StatesInformer, cfg *Config) (RuntimeHook, si.RegisterCallbacks(statesinformer.RegisterTypeNodeMetadata, "runtime-hooks-rule-node-metadata", "Update hooks rule if Node metadata update", rule.UpdateRules) + si.RegisterCallbacks(statesinformer.RegisterTypeAllPods, "runtime-hooks-rule-all-pods", + "Update hooks rule of all Pods refresh", rule.UpdateRules) if err := s.Setup(); err != nil { return nil, fmt.Errorf("failed to setup runtime hook server, error %v", err) } diff --git a/pkg/koordlet/statesinformer/api.go b/pkg/koordlet/statesinformer/api.go index 2acfc926f..656b28179 100644 --- a/pkg/koordlet/statesinformer/api.go +++ b/pkg/koordlet/statesinformer/api.go @@ -17,6 +17,8 @@ limitations under the License. package statesinformer import ( + "fmt" + topov1alpha1 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1" corev1 "k8s.io/api/core/v1" @@ -80,6 +82,13 @@ type CallbackTarget struct { HostApplications []slov1alpha1.HostApplicationSpec } +func (t *CallbackTarget) String() string { + if t == nil { + return "target: nil" + } + return fmt.Sprintf("target: pods num %v, host apps num %v", len(t.Pods), len(t.HostApplications)) +} + type UpdateCbFn func(t RegisterType, obj interface{}, target *CallbackTarget) type StatesInformer interface { diff --git a/pkg/koordlet/statesinformer/impl/callback_runner.go b/pkg/koordlet/statesinformer/impl/callback_runner.go index 362a01d15..7567002bc 100644 --- a/pkg/koordlet/statesinformer/impl/callback_runner.go +++ b/pkg/koordlet/statesinformer/impl/callback_runner.go @@ -102,7 +102,8 @@ func (s *callbackRunner) runCallbacks(objType statesinformer.RegisterType, obj i callbackTarget.HostApplications = nodeSLO.Spec.HostApplications } for _, c := range callbacks { - klog.V(5).Infof("start running callback function %v for type %v", c.name, objType.String()) + klog.V(5).Infof("start running callback function %v for type %v, pod num %v, host app num %v", + c.name, objType.String(), len(callbackTarget.Pods), len(callbackTarget.HostApplications)) c.fn(objType, obj, callbackTarget) } } diff --git a/pkg/koordlet/statesinformer/impl/states_informer.go b/pkg/koordlet/statesinformer/impl/states_informer.go index 287e485e0..0de563550 100644 --- a/pkg/koordlet/statesinformer/impl/states_informer.go +++ b/pkg/koordlet/statesinformer/impl/states_informer.go @@ -149,7 +149,6 @@ func (s *statesInformer) Run(stopCh <-chan struct{}) error { klog.V(2).Infof("starting callback runner") s.states.callbackRunner.Setup(s) - go s.states.callbackRunner.Start(stopCh) klog.V(2).Infof("starting informer plugins") s.setupPlugins() @@ -170,6 +169,11 @@ func (s *statesInformer) Run(stopCh <-chan struct{}) error { } } + // start callback runner after informers synced + // since some callbacks needs the integrated input to execute, e.g. valid pods list + // the initial callback events will not be missing since the callback channels are buffered + go s.states.callbackRunner.Start(stopCh) + klog.Infof("start states informer successfully") s.started.Store(true) <-stopCh diff --git a/pkg/koordlet/statesinformer/impl/states_pods.go b/pkg/koordlet/statesinformer/impl/states_pods.go index ebf6d4d0f..d79429ec1 100644 --- a/pkg/koordlet/statesinformer/impl/states_pods.go +++ b/pkg/koordlet/statesinformer/impl/states_pods.go @@ -152,10 +152,11 @@ func (s *podsInformer) syncPods() error { newPodMap := make(map[string]*statesinformer.PodMeta, len(podList.Items)) // reset pod container metrics resetPodMetrics() - for _, pod := range podList.Items { + for i := range podList.Items { + pod := &podList.Items[i] podMeta := &statesinformer.PodMeta{ - Pod: pod.DeepCopy(), - CgroupDir: genPodCgroupParentDir(&pod), + Pod: pod, // no need to deep-copy from unmarshalled + CgroupDir: genPodCgroupParentDir(pod), } newPodMap[string(pod.UID)] = podMeta // record pod container metrics diff --git a/pkg/koordlet/util/cold_page_test.go b/pkg/koordlet/util/cold_page_test.go index ea44b894c..97140659d 100644 --- a/pkg/koordlet/util/cold_page_test.go +++ b/pkg/koordlet/util/cold_page_test.go @@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ + package util import ( diff --git a/pkg/koordlet/util/container.go b/pkg/koordlet/util/container.go index a6e67a996..2d1eab46f 100644 --- a/pkg/koordlet/util/container.go +++ b/pkg/koordlet/util/container.go @@ -20,8 +20,6 @@ import ( "fmt" "os" "path/filepath" - "strconv" - "strings" corev1 "k8s.io/api/core/v1" @@ -72,6 +70,11 @@ func ParseContainerID(basename string) (string, error) { return system.CgroupPathFormatter.ContainerIDParser(basename) } +func IsValidContainerCgroupDir(containerParentDir string) bool { + containerID, err := system.CgroupPathFormatter.ContainerIDParser(filepath.Base(containerParentDir)) + return err == nil && len(containerID) >= 0 +} + func GetPIDsInContainer(podParentDir string, c *corev1.ContainerStatus) ([]uint32, error) { cgroupPath, err := GetContainerCgroupCPUProcsPath(podParentDir, c) if err != nil { @@ -81,15 +84,6 @@ func GetPIDsInContainer(podParentDir string, c *corev1.ContainerStatus) ([]uint3 if err != nil { return nil, err } - pidStrs := strings.Fields(strings.TrimSpace(string(rawContent))) - pids := make([]uint32, len(pidStrs)) - for i := 0; i < len(pids); i++ { - p, err := strconv.ParseUint(pidStrs[i], 10, 32) - if err != nil { - return nil, err - } - pids[i] = uint32(p) - } - return pids, nil + return system.ParseCgroupProcs(string(rawContent)) } diff --git a/pkg/koordlet/util/system/cgroup.go b/pkg/koordlet/util/system/cgroup.go index 92df2e40b..cc3468949 100644 --- a/pkg/koordlet/util/system/cgroup.go +++ b/pkg/koordlet/util/system/cgroup.go @@ -200,6 +200,22 @@ func ParseMemoryNumaStat(content string) ([]NumaMemoryPages, error) { return stat, nil } +// ParseCgroupProcs parses the content in cgroup.procs. +// pattern: `7742\n10971\n11049\n11051...` +// TODO: refactor with readCgroupAndParseInt32Slice via Generics. +func ParseCgroupProcs(content string) ([]uint32, error) { + pidStrs := strings.Fields(strings.TrimSpace(content)) + pids := make([]uint32, len(pidStrs)) + for i := 0; i < len(pidStrs); i++ { + p, err := strconv.ParseUint(pidStrs[i], 10, 32) + if err != nil { + return nil, fmt.Errorf("failed to parse row %s into pid, err: %w", pidStrs[i], err) + } + pids[i] = uint32(p) + } + return pids, nil +} + func CalcCPUThrottledRatio(curPoint, prePoint *CPUStatRaw) float64 { deltaPeriod := curPoint.NrPeriods - prePoint.NrPeriods deltaThrottled := curPoint.NrThrottled - prePoint.NrThrottled diff --git a/pkg/koordlet/util/system/cgroup_resource.go b/pkg/koordlet/util/system/cgroup_resource.go index 2426284d8..5ad67f18a 100644 --- a/pkg/koordlet/util/system/cgroup_resource.go +++ b/pkg/koordlet/util/system/cgroup_resource.go @@ -135,6 +135,7 @@ const ( CPUMaxName = "cpu.max" CPUMaxBurstName = "cpu.max.burst" CPUWeightName = "cpu.weight" + CPUIdleName = "cpu.idle" CPUSetCPUSName = "cpuset.cpus" CPUSetCPUSEffectiveName = "cpuset.cpus.effective" @@ -178,6 +179,7 @@ var ( CPUBvtWarpNsValidator = &RangeValidator{min: -1, max: 2} CPUWeightValidator = &RangeValidator{min: CPUWeightMinValue, max: CPUWeightMaxValue} CPUMaxBurstValidator = &RangeValidator{min: 0, max: math.MaxInt64} + CPUIdleValidator = &RangeValidator{min: 0, max: 1} MemoryWmarkRatioValidator = &RangeValidator{min: 0, max: 100} MemoryPriorityValidator = &RangeValidator{min: 0, max: 12} MemoryOomGroupValidator = &RangeValidator{min: 0, max: 1} @@ -204,6 +206,7 @@ var ( CPUCFSPeriod = DefaultFactory.New(CPUCFSPeriodName, CgroupCPUDir) CPUBurst = DefaultFactory.New(CPUBurstName, CgroupCPUDir).WithValidator(CPUBurstValidator).WithCheckSupported(SupportedIfFileExists) CPUBVTWarpNs = DefaultFactory.New(CPUBVTWarpNsName, CgroupCPUDir).WithValidator(CPUBvtWarpNsValidator).WithCheckSupported(SupportedIfFileExists) + CPUIdle = DefaultFactory.New(CPUIdleName, CgroupCPUDir).WithValidator(CPUIdleValidator).WithCheckSupported(SupportedIfFileExistsInKubepods).WithCheckOnce(true) CPUTasks = DefaultFactory.New(CPUTasksName, CgroupCPUDir) CPUProcs = DefaultFactory.New(CPUProcsName, CgroupCPUDir) @@ -245,6 +248,7 @@ var ( CPUBurst, CPUTasks, CPUBVTWarpNs, + CPUIdle, CPUSet, CPUAcctStat, CPUAcctUsage, @@ -282,6 +286,7 @@ var ( CPUAcctUsageV2 = DefaultFactory.NewV2(CPUAcctUsageName, CPUStatName) CPUBurstV2 = DefaultFactory.NewV2(CPUBurstName, CPUMaxBurstName).WithValidator(CPUMaxBurstValidator).WithCheckSupported(SupportedIfFileExistsInKubepods).WithCheckOnce(true) CPUBVTWarpNsV2 = DefaultFactory.NewV2(CPUBVTWarpNsName, CPUBVTWarpNsName).WithValidator(CPUBvtWarpNsValidator).WithCheckSupported(SupportedIfFileExists) + CPUIdleV2 = DefaultFactory.NewV2(CPUIdleName, CPUIdleName).WithValidator(CPUIdleValidator).WithCheckSupported(SupportedIfFileExistsInKubepods).WithCheckOnce(true) CPUAcctCPUPressureV2 = DefaultFactory.NewV2(CPUAcctCPUPressureName, CPUAcctCPUPressureName).WithCheckSupported(SupportedIfFileExistsInKubepods).WithCheckOnce(true) CPUAcctMemoryPressureV2 = DefaultFactory.NewV2(CPUAcctMemoryPressureName, CPUAcctMemoryPressureName).WithCheckSupported(SupportedIfFileExistsInKubepods).WithCheckOnce(true) @@ -314,6 +319,7 @@ var ( CPUAcctUsageV2, CPUBurstV2, CPUBVTWarpNsV2, + CPUIdleV2, CPUAcctCPUPressureV2, CPUAcctMemoryPressureV2, CPUAcctIOPressureV2, diff --git a/pkg/koordlet/util/system/common.go b/pkg/koordlet/util/system/common.go index 6aff60031..6a7f32578 100644 --- a/pkg/koordlet/util/system/common.go +++ b/pkg/koordlet/util/system/common.go @@ -20,6 +20,7 @@ import ( "io" "os" "path" + "runtime" "strings" "syscall" @@ -109,3 +110,20 @@ func ParseKVMap(content string) map[string]string { } return m } + +// GoWithNewThread synchronously runs the function in a new goroutine bound to a new OS thread. +func GoWithNewThread(f func() interface{}) interface{} { + // Lock the thread of the caller goroutine to ensure the thread does not change outside the new goroutine. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + retCh := make(chan interface{}) + go func() { + // When the calling goroutine exits without unlocking the thread, the thread will be terminated. + // It helps the function to lock with an individual thread so not to affect the caller goroutine. + runtime.LockOSThread() + ret := f() + retCh <- ret + }() + ret := <-retCh + return ret +} diff --git a/pkg/koordlet/util/system/common_test.go b/pkg/koordlet/util/system/common_test.go index 9634d3b68..a3fea783c 100644 --- a/pkg/koordlet/util/system/common_test.go +++ b/pkg/koordlet/util/system/common_test.go @@ -19,12 +19,18 @@ package system import ( "fmt" "reflect" + "runtime" "testing" "time" "github.com/stretchr/testify/assert" ) +func DumpGoroutineInfo() string { + return fmt.Sprintf("GOMAXPROC=%v, NumCPU=%v, NumGoroutine=%v", + runtime.GOMAXPROCS(0), runtime.NumCPU(), runtime.NumGoroutine()) +} + type TestMetric struct { Time time.Time Value int64 @@ -91,3 +97,64 @@ func TestParseKVMap(t *testing.T) { }) } } + +func TestGoWithNewThread(t *testing.T) { + t.Run("test", func(t *testing.T) { + f := func() interface{} { + t.Log("TestGoWithNewThread without error") + return (error)(nil) + } + retIf := GoWithNewThread(f) + assert.Nil(t, retIf) + + f = func() interface{} { + t.Log("TestGoWithNewThread with error") + return fmt.Errorf("got error") + } + retIf = GoWithNewThread(f) + err, ok := retIf.(error) + assert.True(t, ok) + assert.Error(t, err.(error)) + }) +} + +func BenchmarkGoWithNewThread(b *testing.B) { + tests := []struct { + name string + arg func(*FileTestUtil) error + wantErr bool + }{ + { + name: "empty func", + arg: func(*FileTestUtil) error { + return nil + }, + wantErr: false, + }, + { + name: "file write and read", + arg: func(helper *FileTestUtil) error { + content := `hello world` + helper.WriteFileContents("GoWithNewThreadWR.txt", content) + got := helper.ReadFileContents("GoWithNewThreadWR.txt") + assert.Equal(helper.t, content, got) + return nil + }, + wantErr: false, + }, + } + b.ResetTimer() + for _, tt := range tests { + b.Run(tt.name, func(b *testing.B) { + helper := NewFileTestUtil(b) + defer helper.Cleanup() + for i := 0; i < b.N; i++ { + got := GoWithNewThread(func() interface{} { + return tt.arg(helper) + }) + gotErr := got.(error) + assert.Equal(b, tt.wantErr, gotErr != nil, gotErr) + } + }) + } +} diff --git a/pkg/koordlet/util/system/core_sched.go b/pkg/koordlet/util/system/core_sched.go new file mode 100644 index 000000000..900b8e5d5 --- /dev/null +++ b/pkg/koordlet/util/system/core_sched.go @@ -0,0 +1,382 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package system + +import ( + "fmt" + "os" + "strings" + + "k8s.io/klog/v2" +) + +const ( + // SchedFeatureCoreSched is the feature name of the core scheduling in `/sys/kernel/debug/sched_features`. + SchedFeatureCoreSched = "CORE_SCHED" + // SchedFeatureNoCoreSched is the feature name that the core scheduling supported but disabled. + SchedFeatureNoCoreSched = "NO_CORE_SCHED" +) + +// CoreSchedScopeType defines the type of the PID type operated in core sched. +type CoreSchedScopeType uint + +const ( + // CoreSchedScopeThread means the PID type operated in core sched is a thread. + CoreSchedScopeThread CoreSchedScopeType = iota + // CoreSchedScopeThreadGroup means the PID type operated in core sched is a thread group. + CoreSchedScopeThreadGroup + // CoreSchedScopeProcessGroup means the PID type operated in core sched is a process group. + CoreSchedScopeProcessGroup +) + +// CoreSchedInterface defines the basic operations of the Linux Core Scheduling that targets on the PID-level by prctl(). +// https://docs.kernel.org/admin-guide/hw-vuln/core-scheduling.html +type CoreSchedInterface interface { + // Get gets core sched cookie of pid. + // The default cookie ID is 0. It only supports thread-level. + Get(pidType CoreSchedScopeType, pid uint32) (uint64, error) + // Create creates a new unique cookie to pid. + // The cookie ID is generated by the kernel and is unique for each group. + Create(pidType CoreSchedScopeType, pid uint32) error + // ShareTo push core sched cookie (of the current task) to pid. + // e.g. ShareTo(CoreSchedScopeThreadGroup, 10000) means set the cookie of the pid 10000 as the current thread's + // cookie of the agent. + ShareTo(pidType CoreSchedScopeType, pid uint32) error + // ShareFrom pull core sched cookie from pid (to the current task). + // It only supports thread-level. + // e.g. ShareFrom(CoreSchedScopeThreadGroup, 10000) means set the current thread's cookie of the agent as the + // cookie of the pid 10000. + ShareFrom(pidType CoreSchedScopeType, pid uint32) error +} + +// CoreSchedExtendedInterface defines the operations of the Linux Core Scheduling including extended OPs. +// It implements simplified operations Clear and Assign by combining ShareTo and ShareFrom with an empty thread relay. +type CoreSchedExtendedInterface interface { + CoreSchedInterface + // Clear clears core sched cookie to the default cookie 0, and returns the list of failed pids. + // It can be implemented with ShareTo(pidType, pid) where the current task's cookie is 0. + // e.g. Clear(CoreSchedScopeThreadGroup, 10000, 20000) means reset the cookies of pid 10000 and pid 20000 to 0. + Clear(pidType CoreSchedScopeType, pids ...uint32) ([]uint32, error) + // Assign assigns core sched cookie of the pidFrom onto pidsTo, and returns the list of failed pidTos. + // pidFrom only supports thread-level, while the pidsTo can be in other levels. + // It can be implemented with ShareFrom(pidTypeFrom, pidFrom) and ShareTo(pidTypeTo, pidTo) where the current task's + // cookie is locked between the two operations. + // e.g. Assign(CoreSchedScopeThread, 10000, CoreSchedScopeProcessGroup, 10001, 20000) means set the cookies of the + // process group 10001 and 20000 as the cookie of the thread 10000. + Assign(pidTypeFrom CoreSchedScopeType, pidFrom uint32, pidTypeTo CoreSchedScopeType, pidsTo ...uint32) ([]uint32, error) +} + +// FakeCoreSchedExtended implements the fake CoreSchedExtendedInterface for testing. +type FakeCoreSchedExtended struct { + PIDToCookie map[uint32]uint64 + PIDToPGID map[uint32]uint32 + PIDToTGID map[uint32]uint32 + PIDToError map[uint32]bool + CurPID uint32 + NextCookieID uint64 +} + +func NewFakeCoreSchedExtended(pidToCookie map[uint32]uint64, pidToPGID map[uint32]uint32, pidToError map[uint32]bool) CoreSchedExtendedInterface { + f := &FakeCoreSchedExtended{ + PIDToCookie: pidToCookie, + PIDToPGID: pidToPGID, + PIDToError: pidToError, + CurPID: 1, + NextCookieID: 1, + } + if f.PIDToCookie == nil { + f.PIDToCookie = map[uint32]uint64{} + } + if f.PIDToPGID == nil { + f.PIDToPGID = map[uint32]uint32{} + } + if f.PIDToTGID == nil { + f.PIDToTGID = f.PIDToPGID + } + if f.PIDToError == nil { + f.PIDToError = map[uint32]bool{} + } + for pid, pgid := range pidToPGID { + f.PIDToPGID[pid] = pgid + } + return f +} + +func (f *FakeCoreSchedExtended) SetCurPID(pid uint32) { + f.CurPID = pid +} + +func (f *FakeCoreSchedExtended) SetNextCookieID(id uint64) { + f.NextCookieID = id +} + +func (f *FakeCoreSchedExtended) Get(pidType CoreSchedScopeType, pid uint32) (uint64, error) { + if _, ok := f.PIDToError[pid]; ok { + return 0, fmt.Errorf("get cookie error") + } + if pidType != CoreSchedScopeThread { + return 0, fmt.Errorf("unsupported pid type %d", pidType) + } + if v, ok := f.PIDToCookie[pid]; ok { + return v, nil + } + return 0, nil +} + +func (f *FakeCoreSchedExtended) Create(pidType CoreSchedScopeType, pid uint32) error { + if _, ok := f.PIDToError[pid]; ok { + return fmt.Errorf("create cookie error") + } + f.PIDToCookie[pid] = f.NextCookieID + if pidType == CoreSchedScopeProcessGroup { + for cPID, pgid := range f.PIDToPGID { + if pgid == pid { + f.PIDToCookie[cPID] = f.NextCookieID + } + } + } else if pidType == CoreSchedScopeThreadGroup { + for cPID, tgid := range f.PIDToTGID { + if tgid == pid { + f.PIDToCookie[cPID] = f.NextCookieID + } + } + } + f.NextCookieID++ + return nil +} + +func (f *FakeCoreSchedExtended) ShareTo(pidType CoreSchedScopeType, pid uint32) error { + if _, ok := f.PIDToError[pid]; ok { + return fmt.Errorf("shareTo cookie error") + } + curCookieID := f.PIDToCookie[f.CurPID] + f.PIDToCookie[pid] = curCookieID + if pidType == CoreSchedScopeProcessGroup { + for cPID, pgid := range f.PIDToPGID { + if pgid == pid { + f.PIDToCookie[cPID] = curCookieID + } + } + } else if pidType == CoreSchedScopeThreadGroup { + for cPID, tgid := range f.PIDToTGID { + if tgid == pid { + f.PIDToCookie[cPID] = curCookieID + } + } + } + return nil +} + +func (f *FakeCoreSchedExtended) ShareFrom(pidType CoreSchedScopeType, pid uint32) error { + if _, ok := f.PIDToError[pid]; ok { + return fmt.Errorf("shareFrom cookie error") + } + if pidType != CoreSchedScopeThread { + return fmt.Errorf("unsupported pid type %d", pidType) + } + f.PIDToCookie[f.CurPID] = f.PIDToCookie[pid] + return nil +} + +func (f *FakeCoreSchedExtended) Clear(pidType CoreSchedScopeType, pids ...uint32) ([]uint32, error) { + var failedPIDs []uint32 + for _, pid := range pids { + if _, ok := f.PIDToError[pid]; ok { + failedPIDs = append(failedPIDs, pid) + continue + } + f.PIDToCookie[pid] = 0 + if pidType == CoreSchedScopeProcessGroup { + for cPID, pgid := range f.PIDToPGID { + if pgid == pid { + f.PIDToCookie[cPID] = 0 + } + } + } else if pidType == CoreSchedScopeThreadGroup { + for cPID, tgid := range f.PIDToTGID { + if tgid == pid { + f.PIDToCookie[cPID] = 0 + } + } + } + } + if len(failedPIDs) > 0 { + return failedPIDs, fmt.Errorf("clear cookie error") + } + return nil, nil +} + +func (f *FakeCoreSchedExtended) Assign(pidTypeFrom CoreSchedScopeType, pidFrom uint32, pidTypeTo CoreSchedScopeType, pidsTo ...uint32) ([]uint32, error) { + var failedPIDs []uint32 + if pidTypeFrom != CoreSchedScopeThread { + return nil, fmt.Errorf("unsupported pid type %d", pidTypeFrom) + } + if _, ok := f.PIDToError[pidFrom]; ok { + return nil, fmt.Errorf("assign cookie for pidFrom error") + } + cookieID := f.PIDToCookie[pidFrom] + for _, pidTo := range pidsTo { + if _, ok := f.PIDToError[pidTo]; ok { + failedPIDs = append(failedPIDs, pidTo) + continue + } + if pidTypeTo == CoreSchedScopeThreadGroup { + f.PIDToCookie[pidTo] = cookieID + continue + } + if pidTypeTo == CoreSchedScopeProcessGroup { + for cPID, pgid := range f.PIDToPGID { + if pgid != pidTo { + continue + } + if _, ok := f.PIDToError[cPID]; ok { + failedPIDs = append(failedPIDs, cPID) + continue + } + f.PIDToCookie[cPID] = cookieID + } + } else if pidTypeTo == CoreSchedScopeThreadGroup { + for cPID, tgid := range f.PIDToTGID { + if tgid != pidTo { + continue + } + if _, ok := f.PIDToError[cPID]; ok { + failedPIDs = append(failedPIDs, cPID) + continue + } + f.PIDToCookie[cPID] = cookieID + } + } + } + if len(failedPIDs) > 0 { + return failedPIDs, fmt.Errorf("assign cookie for pidsTo error") + } + return nil, nil +} + +// EnableCoreSchedIfSupported checks if the core scheduling feature is enabled in the kernel sched_features. +// If kernel supported (available in the latest Anolis OS), it tries to enable the core scheduling feature. +// The core sched's kernel feature is known set in two places, if both of them are not found, the system is considered +// unsupported for the core scheduling: +// 1. In `/proc/sys/kernel/sched_core`, the value `1` means the feature is enabled while `0` means disabled. +// 2. (Older kernel) In `/sys/kernel/debug/sched_features`, the field `CORE_SCHED` means the feature is enabled while `NO_CORE_SCHED` +// means it is disabled. +func EnableCoreSchedIfSupported() (bool, string) { + // 1. try sysctl + isSysctlSupported, err := GetSchedCore() + if err == nil && isSysctlSupported { + klog.V(6).Info("Core Sched is already enabled by sysctl") + return true, "" + } + if err == nil { // sysctl supported while value=0 + klog.V(6).Info("Core Sched is disabled by sysctl, try to enable it") + err = SetSchedCore(true) + if err == nil { + klog.Info("Core Sched is enabled by sysctl successfully") + return true, "" + } + klog.V(4).Infof("failed to enable core sched via sysctl, fallback to sched_features, err: %s", err) + } else { + klog.V(5).Infof("failed to enable core sched via sysctl since get failed, try sched_features, err: %s", err) + } + + // 2. try sched_features (old interface) + isSchedFeaturesSuppported, msg := SchedFeatures.IsSupported("") + if !isSchedFeaturesSuppported { // sched_features not exist + klog.V(6).Infof("failed to enable core sched via sysctl or sched_features, feature unsupported, msg: %s", msg) + return false, "core sched not supported" + } + isSchedFeatureEnabled, err := IsCoreSchedFeatureEnabled() + if err == nil && isSchedFeatureEnabled { + klog.V(6).Info("Core Sched is already enabled by sched_features") + return true, "" + } + if err == nil { + klog.V(6).Info("Core Sched is disabled by sched_features, try to enable it") + isSchedFeatureEnabled, msg = SetCoreSchedFeatureEnabled() + if isSchedFeatureEnabled { + klog.Info("Core Sched is enabled by sched_features successfully") + return true, "" + } + klog.V(4).Infof("failed to enable core sched via sched_features, msg: %s", msg) + } else { + klog.V(5).Infof("failed to enable core sched via sched_features, err: %s", err) + } + + return false, "core sched not supported" +} + +func IsCoreSchedFeatureEnabled() (bool, error) { + featurePath := SchedFeatures.Path("") + content, err := os.ReadFile(featurePath) + if err != nil { + return false, fmt.Errorf("failed to read sched_features, err: %w", err) + } + + features := strings.Fields(string(content)) + for _, feature := range features { + if feature == SchedFeatureCoreSched { + klog.V(6).Infof("Core Sched is enabled by sched_features") + return true, nil + } else if feature == SchedFeatureNoCoreSched { + klog.V(6).Infof("Core Sched is disabled by sched_features") + return false, nil + } + } + + return false, fmt.Errorf("core sched not found in sched_features") +} + +// SetCoreSchedFeatureEnabled checks if the core scheduling feature can be enabled in the kernel sched_features. +func SetCoreSchedFeatureEnabled() (bool, string) { + featurePath := SchedFeatures.Path("") + content, err := os.ReadFile(featurePath) + if err != nil { + klog.V(5).Infof("Core Sched is unsupported by sched_features %s, read err: %s", featurePath, err) + return false, fmt.Sprintf("failed to read sched_features") + } + + features := strings.Fields(string(content)) + for _, feature := range features { + if feature == SchedFeatureCoreSched { + return true, "" + } + } + + err = os.WriteFile(featurePath, []byte(fmt.Sprintf("%s\n", SchedFeatureCoreSched)), 0666) + if err != nil { + klog.V(5).Infof("Core Sched is unsupported by sched_features %s, write err: %s", featurePath, err) + return false, fmt.Sprintf("failed to write sched_features") + } + + return true, "" +} + +const ( + // VirtualCoreSchedCookieName is the name of a virtual system resource for the core scheduling cookie. + VirtualCoreSchedCookieName = "core_sched_cookie" + + // DefaultCoreSchedCookieID is the default cookie of the core scheduling. + DefaultCoreSchedCookieID uint64 = 0 +) + +var ( + // VirtualCoreSchedCookie represents a virtual system resource for the core scheduling cookie. + // It is virtual for denoting the operation on processes' core scheduling cookie, and it is not allowed to do + // any real read or write on the provided filepath. + VirtualCoreSchedCookie = NewCommonSystemResource("", VirtualCoreSchedCookieName, GetProcRootDir) +) diff --git a/pkg/koordlet/util/system/core_sched_linux.go b/pkg/koordlet/util/system/core_sched_linux.go new file mode 100644 index 000000000..dfa586364 --- /dev/null +++ b/pkg/koordlet/util/system/core_sched_linux.go @@ -0,0 +1,176 @@ +//go:build linux +// +build linux + +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package system + +import ( + "fmt" + "unsafe" + + "golang.org/x/sys/unix" + utilerrors "k8s.io/apimachinery/pkg/util/errors" +) + +type CoreSched struct{} + +func NewCoreSched() CoreSchedInterface { + return &CoreSched{} +} + +func NewCoreSchedExtended() CoreSchedExtendedInterface { + return &CoreSched{} +} + +func (s *CoreSched) Get(pidType CoreSchedScopeType, pid uint32) (uint64, error) { + // NOTE: pidType only support Thread type. + cookie := uint64(0) + cookiePtr := &cookie + ret, err := unix.PrctlRetInt(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_GET, uintptr(pid), uintptr(pidType), uintptr(unsafe.Pointer(cookiePtr))) + if err != nil { + return 0, fmt.Errorf("CoreSched get error, PID_TYPE=%v, PID=%v, err: %w", pidType, pid, err) + } + if ret != 0 { + return 0, fmt.Errorf("CoreSched get failed, PID_TYPE=%v, PID=%v, ret: %v", pidType, pid, ret) + } + return cookie, nil +} + +func (s *CoreSched) Create(pidType CoreSchedScopeType, pid uint32) error { + ret, err := unix.PrctlRetInt(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_CREATE, uintptr(pid), uintptr(pidType), 0) + if err != nil { + return fmt.Errorf("CoreSched create error, PID_TYPE=%v, PID=%v, err: %w", pidType, pid, err) + } + if ret != 0 { + return fmt.Errorf("CoreSched create failed, PID_TYPE=%v, PID=%v, ret: %v", pidType, pid, ret) + } + return nil +} + +func (s *CoreSched) ShareTo(pidType CoreSchedScopeType, pid uint32) error { + ret, err := unix.PrctlRetInt(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_SHARE_TO, uintptr(pid), uintptr(pidType), 0) + if err != nil { + return fmt.Errorf("CoreSched shareTo error, PID_TYPE=%v, PID=%v, err: %w", pidType, pid, err) + } + if ret != 0 { + return fmt.Errorf("CoreSched shareTo failed, PID_TYPE=%v, PID=%v, ret: %v", pidType, pid, ret) + } + return nil +} + +func (s *CoreSched) ShareFrom(pidType CoreSchedScopeType, pid uint32) error { + // NOTE: pidTypeFrom only support Thread type. + ret, err := unix.PrctlRetInt(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_SHARE_FROM, uintptr(pid), uintptr(pidType), 0) + if err != nil { + return fmt.Errorf("CoreSched shareFrom error, PID_TYPE=%v, PID=%v, err: %w", pidType, pid, err) + } + if ret != 0 { + return fmt.Errorf("CoreSched shareFrom failed, PID_TYPE=%v, PID=%v, ret: %v", pidType, pid, ret) + } + return nil +} + +type CoreSchedExtendedResult struct { + FailedPIDs []uint32 + Error error +} + +func (s *CoreSched) clear(pidType CoreSchedScopeType, pids ...uint32) ([]uint32, error) { + var failedPIDs []uint32 + var errs []error + for _, pid := range pids { + err := s.ShareTo(pidType, pid) + if err != nil { + failedPIDs = append(failedPIDs, pid) + errs = append(errs, err) + } + } + if len(errs) > 0 { + return failedPIDs, utilerrors.NewAggregate(errs) + } + return nil, nil +} + +func (s *CoreSched) Clear(pidType CoreSchedScopeType, pids ...uint32) ([]uint32, error) { + // keep the outside goroutine with cookie 0, then we can reset the target pid's cookie by ShareTo the cookie of + // the new goroutine + // TODO: directly use syscall when the kernel supports Clear (0x1000) + retIf := GoWithNewThread(func() interface{} { + failedPIDs, err := s.clear(pidType, pids...) + if err != nil { + return &CoreSchedExtendedResult{ + FailedPIDs: failedPIDs, + Error: err, + } + } + return nil + }) + if retIf == nil { + return nil, nil + } + ret := retIf.(*CoreSchedExtendedResult) + if ret != nil { + return ret.FailedPIDs, fmt.Errorf("CoreSched Clear failed, err: %w", ret.Error) + } + return nil, nil +} + +func (s *CoreSched) assign(pidTypeFrom CoreSchedScopeType, pidFrom uint32, pidTypeTo CoreSchedScopeType, pidsTo ...uint32) ([]uint32, error) { + err := s.ShareFrom(pidTypeFrom, pidFrom) + if err != nil { + return nil, err + } + var failedPIDs []uint32 + var errs []error + for _, pidTo := range pidsTo { + err1 := s.ShareTo(pidTypeTo, pidTo) + if err1 != nil { + failedPIDs = append(failedPIDs, pidTo) + errs = append(errs, err1) + } + } + if len(errs) > 0 { + return failedPIDs, utilerrors.NewAggregate(errs) + } + return nil, nil +} + +func (s *CoreSched) Assign(pidTypeFrom CoreSchedScopeType, pidFrom uint32, pidTypeTo CoreSchedScopeType, pidsTo ...uint32) ([]uint32, error) { + // keep the outside goroutine with cookie 0, then we can assign the pidFrom's cookie to pidTo by + // NOTE: pidTypeFrom only support Thread type. + // 1. ShareFrom the pidFrom's cookie to the new goroutine + // 2. ShareTo the new goroutine's cookie to the target pidTo + retIf := GoWithNewThread(func() interface{} { + failedPIDs, err := s.assign(pidTypeFrom, pidFrom, pidTypeTo, pidsTo...) + if err != nil { + return &CoreSchedExtendedResult{ + FailedPIDs: failedPIDs, + Error: err, + } + } + return nil + }) + if retIf == nil { + return nil, nil + } + ret := retIf.(*CoreSchedExtendedResult) + if ret != nil { + return ret.FailedPIDs, fmt.Errorf("CoreSched Clear failed, err: %w", ret.Error) + } + return nil, nil +} diff --git a/pkg/koordlet/util/system/core_sched_linux_test.go b/pkg/koordlet/util/system/core_sched_linux_test.go new file mode 100644 index 000000000..03591c5e6 --- /dev/null +++ b/pkg/koordlet/util/system/core_sched_linux_test.go @@ -0,0 +1,210 @@ +//go:build linux +// +build linux + +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package system + +import ( + "fmt" + "os" + "strconv" + "sync" + "syscall" + "testing" +) + +func DumpGoThreadInfo(prefix string) string { + return fmt.Sprintf("[%s] PID=%v, parent PID=%v, TTID=%v, %s", + prefix, os.Getpid(), os.Getppid(), syscall.Gettid(), DumpGoroutineInfo()) +} + +func BenchmarkCoreSchedGet(b *testing.B) { + tests := []struct { + name string + parallelism int + }{ + { + name: "2", + parallelism: 2, + }, + { + name: "10", + parallelism: 10, + }, + { + name: "50", + parallelism: 50, + }, + { + name: "100", + parallelism: 100, + }, + { + name: "200", + parallelism: 200, + }, + { + name: "500", + parallelism: 500, + }, + } + + b.ResetTimer() + for _, tt := range tests { + b.Run(tt.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + var wg sync.WaitGroup + for j := 0; j < tt.parallelism; j++ { + wg.Add(1) + go func(x int) { + cs := NewCoreSched() + tid := syscall.Gettid() + _, err := cs.Get(CoreSchedScopeThread, uint32(tid)) + if err != nil { + b.Logf("CORE_SCHED_SCOPE_THREAD %v get failed, err: %s\n", x, err) + } + wg.Done() + }(j) + } + wg.Wait() + } + }) + } +} + +func BenchmarkCoreSchedExtendedAssign(b *testing.B) { + tests := []struct { + name string + parallelism int + isBatch bool + }{ + { + name: "2", + parallelism: 2, + }, + { + name: "10", + parallelism: 10, + }, + { + name: "50", + parallelism: 50, + }, + { + name: "100", + parallelism: 100, + }, + { + name: "200", + parallelism: 200, + }, + { + name: "500", + parallelism: 500, + }, + { + name: "2-batch", + parallelism: 2, + isBatch: true, + }, + { + name: "10-batch", + parallelism: 10, + isBatch: true, + }, + { + name: "50-batch", + parallelism: 50, + isBatch: true, + }, + { + name: "100-batch", + parallelism: 100, + isBatch: true, + }, + { + name: "200-batch", + parallelism: 200, + isBatch: true, + }, + { + name: "500-batch", + parallelism: 500, + isBatch: true, + }, + } + + b.ResetTimer() + for _, tt := range tests { + b.Run(tt.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + if tt.isBatch { // batch assign pids + err := GoWithNewThread(func() interface{} { + tid := syscall.Gettid() + pidsTo := make([]uint32, tt.parallelism) + for j := 0; j < tt.parallelism; j++ { + pidsTo[j] = uint32(tid) + } + + cs := &wrappedCoreSchedExtended{ + CoreSched: &CoreSched{}, + beforeFn: func() { + b.Log(DumpGoThreadInfo("before [batch]")) + }, + afterFn: func() { + b.Log(DumpGoThreadInfo("after [batch]")) + }, + } + _, err := cs.Assign(CoreSchedScopeThread, uint32(tid), CoreSchedScopeThread, pidsTo...) + if err != nil { + return err + } + return nil + }) + if err != nil { + b.Logf("CORE_SCHED_SCOPE_THREAD assign batch failed, err: %s\n", err) + } + continue + } + + var wg sync.WaitGroup + for j := 0; j < tt.parallelism; j++ { + wg.Add(1) + go func(x int) { + tid := syscall.Gettid() + cs := &wrappedCoreSchedExtended{ + CoreSched: &CoreSched{}, + beforeFn: func() { + b.Log(DumpGoThreadInfo("before " + strconv.Itoa(x))) + }, + afterFn: func() { + b.Log(DumpGoThreadInfo("after " + strconv.Itoa(x))) + }, + } + _, err := cs.Assign(CoreSchedScopeThread, uint32(tid), CoreSchedScopeThread, uint32(tid)) + if err != nil { + b.Logf("CORE_SCHED_SCOPE_THREAD %v assign failed, err: %s\n", x, err) + } + wg.Done() + }(j) + } + wg.Wait() + } + }) + } +} diff --git a/pkg/koordlet/util/system/core_sched_test.go b/pkg/koordlet/util/system/core_sched_test.go new file mode 100644 index 000000000..ea47f1370 --- /dev/null +++ b/pkg/koordlet/util/system/core_sched_test.go @@ -0,0 +1,354 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package system + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +var _ CoreSchedExtendedInterface = (*wrappedCoreSchedExtended)(nil) + +type wrappedCoreSchedExtended struct { + *CoreSched + beforeFn func() + afterFn func() +} + +func (w *wrappedCoreSchedExtended) Clear(pidType CoreSchedScopeType, pid ...uint32) ([]uint32, error) { + if w.beforeFn != nil { + w.beforeFn() + } + failedPIDs, err := w.CoreSched.Clear(pidType, pid...) + if w.afterFn != nil { + w.afterFn() + } + return failedPIDs, err +} + +func (w *wrappedCoreSchedExtended) Assign(pidTypeFrom CoreSchedScopeType, pidFrom uint32, pidTypeTo CoreSchedScopeType, pidsTo ...uint32) ([]uint32, error) { + if w.beforeFn != nil { + w.beforeFn() + } + failedPIDs, err := w.CoreSched.Assign(pidTypeFrom, pidFrom, pidTypeTo, pidsTo...) + if w.afterFn != nil { + w.afterFn() + } + return failedPIDs, err +} + +func TestCoreSched(t *testing.T) { + t.Run("test", func(t *testing.T) { + invalidPidType := CoreSchedScopeType(100) + cs := NewCoreSched() + got, gotErr := cs.Get(invalidPidType, 0) + assert.Error(t, gotErr) + assert.Equal(t, uint64(0), got) + gotErr = cs.Create(invalidPidType, 0) + assert.Error(t, gotErr) + + cse := NewCoreSchedExtended() + _, gotErr = cse.Clear(invalidPidType, 0) + assert.Error(t, gotErr) + _, gotErr = cse.Assign(invalidPidType, 0, invalidPidType, 0) + assert.Error(t, gotErr) + }) +} + +func TestFakeCoreSchedExtended(t *testing.T) { + t.Run("test", func(t *testing.T) { + initPIDToCookie := map[uint32]uint64{ + 1: 0, + 2: 0, + 10000: 1, + 10001: 1, + 10010: 2, + 20000: 3, + } + initPIDToPGID := map[uint32]uint32{ + 1: 1, + 2: 1, + 10000: 10000, + 10001: 10000, + 10010: 10010, + 20000: 20000, + 11000: 1, + 21000: 10000, + } + initPIDError := map[uint32]bool{ + 3: true, + 9999: true, + 10002: true, + } + + // new + cs := NewFakeCoreSchedExtended(initPIDToCookie, initPIDToPGID, initPIDError) + assert.NotNil(t, cs) + f, ok := cs.(*FakeCoreSchedExtended) + assert.True(t, ok) + assert.NotNil(t, f) + f.SetCurPID(2) + f.SetNextCookieID(20001) + + // get + got, gotErr := cs.Get(CoreSchedScopeProcessGroup, 1) + assert.Equal(t, uint64(0), got) + assert.Error(t, gotErr) + got, gotErr = cs.Get(CoreSchedScopeThread, 3) + assert.Equal(t, uint64(0), got) + assert.Error(t, gotErr) + got, gotErr = cs.Get(CoreSchedScopeThread, 20000) + assert.Equal(t, uint64(3), got) + assert.NoError(t, gotErr) + + // create + got, gotErr = cs.Get(CoreSchedScopeThread, 21000) + assert.NoError(t, gotErr) + assert.Equal(t, uint64(0), got) + gotErr = cs.Create(CoreSchedScopeProcessGroup, 10000) + assert.NoError(t, gotErr) + got, gotErr = cs.Get(CoreSchedScopeThread, 10000) + assert.NoError(t, gotErr) + assert.Equal(t, uint64(20001), got) + got, gotErr = cs.Get(CoreSchedScopeThread, 21000) + assert.NoError(t, gotErr) + assert.Equal(t, uint64(20001), got) + + // shareTo + gotErr = cs.ShareTo(CoreSchedScopeThread, 21000) + assert.NoError(t, gotErr) + got, gotErr = cs.Get(CoreSchedScopeThread, 21000) + assert.NoError(t, gotErr) + assert.Equal(t, uint64(0), got) + + // shareFrom + gotErr = cs.ShareFrom(CoreSchedScopeThread, 10000) + assert.NoError(t, gotErr) + got, gotErr = cs.Get(CoreSchedScopeThread, 2) + assert.NoError(t, gotErr) + assert.Equal(t, uint64(20001), got) + gotErr = cs.ShareFrom(CoreSchedScopeThread, 1) + assert.NoError(t, gotErr) + got, gotErr = cs.Get(CoreSchedScopeThread, 2) + assert.NoError(t, gotErr) + assert.Equal(t, uint64(0), got) + + // clear + gotErrPIDs, gotErr := cs.Clear(CoreSchedScopeThread, 10010) + assert.NoError(t, gotErr) + assert.Nil(t, gotErrPIDs) + got, gotErr = cs.Get(CoreSchedScopeThread, 10010) + assert.NoError(t, gotErr) + assert.Equal(t, uint64(0), got) + + // assign + gotErrPIDs, gotErr = cs.Assign(CoreSchedScopeThread, 21000, CoreSchedScopeProcessGroup, 10000) + assert.NoError(t, gotErr) + assert.Nil(t, gotErrPIDs) + got, gotErr = cs.Get(CoreSchedScopeThread, 10000) + assert.NoError(t, gotErr) + assert.Equal(t, uint64(0), got) + got, gotErr = cs.Get(CoreSchedScopeThread, 10001) + assert.NoError(t, gotErr) + assert.Equal(t, uint64(0), got) + gotErrPIDs, gotErr = cs.Assign(CoreSchedScopeThread, 1, CoreSchedScopeProcessGroup, 9999, 10000, 10001, 10002) + assert.Error(t, gotErr) + assert.Equal(t, []uint32{9999, 10002}, gotErrPIDs) + }) +} + +func TestEnableCoreSchedIfSupported(t *testing.T) { + type fields struct { + prepareFn func(helper *FileTestUtil) + } + tests := []struct { + name string + fields fields + want bool + want1 string + }{ + { + name: "unsupported since no sched features file", + want: false, + want1: "core sched not supported", + }, + { + name: "unsupported when sched features content is unexpected", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + featuresPath := SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, ``) + }, + }, + want: false, + want1: "core sched not supported", + }, + { + name: "unsupported when sched features content has no core sched", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + featuresPath := SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, `FEATURE_A FEATURE_B FEATURE_C`) + }, + }, + want: false, + want1: "core sched not supported", + }, + { + name: "supported when core sched shows in the sysctl", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + sysctlFeaturePath := GetProcSysFilePath(KernelSchedCore) + helper.WriteFileContents(sysctlFeaturePath, "1\n") + }, + }, + want: true, + want1: "", + }, + { + name: "supported when core sched disabled in the sysctl but can be enabled", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + sysctlFeaturePath := GetProcSysFilePath(KernelSchedCore) + helper.WriteFileContents(sysctlFeaturePath, "0\n") + }, + }, + want: true, + want1: "", + }, + { + name: "supported when core sched shows in the features", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + featuresPath := SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, `FEATURE_A FEATURE_B FEATURE_C CORE_SCHED`) + }, + }, + want: true, + want1: "", + }, + { + name: "supported when core sched shows in the features 1", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + featuresPath := SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, `FEATURE_A CORE_SCHED FEATURE_B`) + }, + }, + want: true, + want1: "", + }, + { + name: "supported when sysctl disabled but can be enabled", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + sysctlFeaturePath := GetProcSysFilePath(KernelSchedCore) + helper.WriteFileContents(sysctlFeaturePath, "0\n") + }, + }, + want: true, + want1: "", + }, + { + name: "supported when sched_features disabled but can be enabled", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + featuresPath := SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, `FEATURE_A FEATURE_B NO_CORE_SCHED`) + }, + }, + want: true, + want1: "", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + helper := NewFileTestUtil(t) + defer helper.Cleanup() + if tt.fields.prepareFn != nil { + tt.fields.prepareFn(helper) + } + got, got1 := EnableCoreSchedIfSupported() + assert.Equal(t, tt.want, got) + assert.Equal(t, tt.want1, got1) + }) + } +} + +func TestSetCoreSchedFeatureEnabled(t *testing.T) { + type fields struct { + prepareFn func(helper *FileTestUtil) + } + tests := []struct { + name string + fields fields + want bool + want1 string + }{ + { + name: "unsupported since no sched features file", + want: false, + want1: "failed to read sched_features", + }, + { + name: "supported when core sched shows in the features", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + featuresPath := SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, `FEATURE_A FEATURE_B FEATURE_C CORE_SCHED`) + }, + }, + want: true, + want1: "", + }, + { + name: "supported when core sched shows in the features 1", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + featuresPath := SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, `FEATURE_A CORE_SCHED FEATURE_B`) + }, + }, + want: true, + want1: "", + }, + { + name: "enabled when add sched features for core sched", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + featuresPath := SchedFeatures.Path("") + helper.WriteFileContents(featuresPath, `FEATURE_A FEATURE_B FEATURE_C`) + }, + }, + want: true, + want1: "", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + helper := NewFileTestUtil(t) + defer helper.Cleanup() + if tt.fields.prepareFn != nil { + tt.fields.prepareFn(helper) + } + got, got1 := SetCoreSchedFeatureEnabled() + assert.Equal(t, tt.want, got) + assert.Equal(t, tt.want1, got1) + }) + } +} diff --git a/pkg/koordlet/util/system/core_sched_unsupported.go b/pkg/koordlet/util/system/core_sched_unsupported.go new file mode 100644 index 000000000..9c1550ecf --- /dev/null +++ b/pkg/koordlet/util/system/core_sched_unsupported.go @@ -0,0 +1,60 @@ +//go:build !linux +// +build !linux + +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package system + +import "fmt" + +type CoreSched struct{} + +func NewCoreSched() CoreSchedInterface { + return &CoreSched{} +} + +func NewCoreSchedExtended() CoreSchedExtendedInterface { + return &CoreSched{} +} + +func (c *CoreSched) Lock() {} + +func (c *CoreSched) Unlock() {} + +func (c *CoreSched) Get(pidType CoreSchedScopeType, pid uint32) (uint64, error) { + return 0, fmt.Errorf("unsupported platform") +} + +func (s *CoreSched) Create(pidType CoreSchedScopeType, pid uint32) error { + return fmt.Errorf("unsupported platform") +} + +func (s *CoreSched) ShareTo(pidType CoreSchedScopeType, pid uint32) error { + return fmt.Errorf("unsupported platform") +} + +func (s *CoreSched) ShareFrom(pidType CoreSchedScopeType, pid uint32) error { + return fmt.Errorf("unsupported platform") +} + +func (s *CoreSched) Clear(pidType CoreSchedScopeType, pid ...uint32) ([]uint32, error) { + return nil, fmt.Errorf("unsupported platform") +} + +func (s *CoreSched) Assign(pidTypeFrom CoreSchedScopeType, pidFrom uint32, pidTypeTo CoreSchedScopeType, pidsTo ...uint32) ([]uint32, error) { + return nil, fmt.Errorf("unsupported platform") +} diff --git a/pkg/koordlet/util/system/proc.go b/pkg/koordlet/util/system/proc.go new file mode 100644 index 000000000..6d9fdf370 --- /dev/null +++ b/pkg/koordlet/util/system/proc.go @@ -0,0 +1,181 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package system + +import ( + "fmt" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + + "k8s.io/klog/v2" +) + +const ( + ProcStatName = "stat" + ProcMemInfoName = "meminfo" + ProcCPUInfoName = "cpuinfo" +) + +func GetProcFilePath(procRelativePath string) string { + return filepath.Join(Conf.ProcRootDir, procRelativePath) +} + +func GetProcRootDir() string { + return Conf.ProcRootDir +} + +// ProcStat is the content of /proc//stat. +// https://manpages.ubuntu.com/manpages/xenial/en/man5/proc.5.html +type ProcStat struct { + Pid uint32 + Comm string + State byte + Ppid uint32 + Pgrp uint32 + // TODO: add more fields if needed +} + +func GetProcPIDStatPath(pid uint32) string { + return filepath.Join(Conf.ProcRootDir, strconv.FormatUint(uint64(pid), 10), ProcStatName) +} + +func ParseProcPIDStat(content string) (*ProcStat, error) { + // pattern: `12345 (stress) S 12340 12344 12340 12300 12345 123450 151 0 0 0 0 0 ...` + // splitAfterComm -> "12345 (stress", " S 12340 12344 12340 12300 12345 123450 151 0 0 0 0 0 ..." + splitAfterComm := strings.SplitN(content, ")", 2) + if len(splitAfterComm) != 2 { + return nil, fmt.Errorf("failed to parse stat, err: Comm not found") + } + // comm + // splitBeforeComm -> "12345 ", "stress" + splitBeforeComm := strings.SplitN(splitAfterComm[0], "(", 2) + if len(splitBeforeComm) != 2 { + return nil, fmt.Errorf("failed to parse stat, err: invalid Comm prefix %s", splitAfterComm[0]) + } + stat := &ProcStat{} + stat.Comm = splitBeforeComm[1] + // pid + trimPID := strings.TrimSpace(splitBeforeComm[0]) + pid, err := strconv.ParseUint(trimPID, 10, 32) + if err != nil { + return nil, fmt.Errorf("failed to parse stat, err: invalid pid %s", splitBeforeComm[0]) + } + stat.Pid = uint32(pid) + // fieldsAfterComm -> "S", "12340", "12344", "12340", "12300", "12345", "123450", "151", "0", ... + fieldsAfterComm := strings.Fields(strings.TrimSpace(splitAfterComm[1])) + if len(fieldsAfterComm) < 3 { // remaining fields are ignored + return nil, fmt.Errorf("failed to parse stat, err: suffix fields not enough %s", splitAfterComm[1]) + } + // state + if len(fieldsAfterComm[0]) > 1 { + return nil, fmt.Errorf("failed to parse stat, err: invalid state %s", fieldsAfterComm[0]) + } + stat.State = fieldsAfterComm[0][0] + // ppid + ppid, err := strconv.ParseUint(fieldsAfterComm[1], 10, 32) + if err != nil { + return nil, fmt.Errorf("failed to parse stat, err: invalid ppid %s", fieldsAfterComm[1]) + } + stat.Ppid = uint32(ppid) + // pgrp/pgid + pgrp, err := strconv.ParseUint(fieldsAfterComm[2], 10, 32) + if err != nil { + return nil, fmt.Errorf("failed to parse stat, err: invalid pgrp %s", fieldsAfterComm[2]) + } + stat.Pgrp = uint32(pgrp) + + return stat, nil +} + +func GetPGIDForPID(pid uint32) (uint32, error) { + pidStatPath := GetProcPIDStatPath(pid) + content, err := os.ReadFile(pidStatPath) + if err != nil { + return 0, err + } + stat, err := ParseProcPIDStat(string(content)) + if err != nil { + return 0, err + } + return stat.Pgrp, nil +} + +// GetPGIDsForPIDs gets the PGIDs for a cgroup's PIDs. +// It will consider the PID as PGID if its PGID does not exist anymore. +func GetPGIDsForPIDs(pids []uint32) ([]uint32, error) { + pidMap := map[uint32]struct{}{} + for _, pid := range pids { + pidMap[pid] = struct{}{} + } + + var pgids []uint32 + pgidMap := map[uint32]struct{}{} + for _, pid := range pids { + // get PGID (pgrp) via /proc/$pid/stat + pgid, err := GetPGIDForPID(pid) + if err != nil { + klog.V(5).Infof("failed to get PGID for pid %v, err: %s", pid, err) + continue + } + + // verify if PGID lives in the pid list + // if not, consider the PID as PGID + _, ok := pidMap[pgid] + if !ok { + klog.V(6).Infof("failed to find PGID %v for pid %v, use pid as PGID", pgid, pid) + pgid = pid + } + + _, ok = pgidMap[pgid] + if ok { + continue + } + + pgidMap[pgid] = struct{}{} + pgids = append(pgids, pgid) + } + + // in ascending order + sort.Slice(pgids, func(i, j int) bool { + return pgids[i] < pgids[j] + }) + + return pgids, nil +} + +func GetContainerPGIDs(containerParentDir string) ([]uint32, error) { + cgroupProcs, err := GetCgroupResource(CPUProcsName) + if err != nil { + return nil, err + } + + cgroupProcsPath := cgroupProcs.Path(containerParentDir) + rawContent, err := os.ReadFile(cgroupProcsPath) + if err != nil { + return nil, err + } + + pids, err := ParseCgroupProcs(string(rawContent)) + if err != nil { + return nil, err + } + + return GetPGIDsForPIDs(pids) +} diff --git a/pkg/koordlet/util/system/proc_test.go b/pkg/koordlet/util/system/proc_test.go new file mode 100644 index 000000000..7c7a8858d --- /dev/null +++ b/pkg/koordlet/util/system/proc_test.go @@ -0,0 +1,297 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package system + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseProcPIDStat(t *testing.T) { + tests := []struct { + name string + arg string + want *ProcStat + wantErr bool + }{ + { + name: "parse failed for empty input", + arg: "", + want: nil, + wantErr: true, + }, + { + name: "parse failed for invalid comm", + arg: `12345 sh S 12340 12344 12340 12300 12345 123450 151 0 0 0 0 0 ...`, + want: nil, + wantErr: true, + }, + { + name: "parse failed for missing pid", + arg: `(stress) S 12340 12344 12340 12300 12345 123450 151 0 0 0 0 0 ...`, + want: nil, + wantErr: true, + }, + { + name: "parse failed for invalid state", + arg: `12345 (stress) unknown 12340 12344 12340 12300 12345 123450 151 0 0 0 0 0 ...`, + want: nil, + wantErr: true, + }, + { + name: "parse failed for invalid ppid", + arg: `12345 (stress) S -1 12344 12340 12300 12345 123450 151 0 0 0 0 0 ...`, + want: nil, + wantErr: true, + }, + { + name: "parse failed for invalid pgrp", + arg: `12345 (stress) S 12340 -1 12340 12300 12345 123450 151 0 0 0 0 0 ...`, + want: nil, + wantErr: true, + }, + { + name: "parse failed for missing fields", + arg: `12345 (stress) S`, + want: nil, + wantErr: true, + }, + { + name: "parse correctly", + arg: `12345 (stress) S 12340 12344 12340 12300 12345 123450 151 0 0 0 0 0 ...`, + want: &ProcStat{ + Pid: 12345, + Comm: "stress", + State: 'S', + Ppid: 12340, + Pgrp: 12344, + }, + wantErr: false, + }, + { + name: "parse correctly 1", + arg: `12345 (sh stress) S 12340 12344 12340 12300 12345 123450 151 0 0 0 0 0 ...`, + want: &ProcStat{ + Pid: 12345, + Comm: "sh stress", + State: 'S', + Ppid: 12340, + Pgrp: 12344, + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, gotErr := ParseProcPIDStat(tt.arg) + assert.Equal(t, tt.wantErr, gotErr != nil, gotErr) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestGetPGIDForPID(t *testing.T) { + type fields struct { + prepareFn func(helper *FileTestUtil) + } + tests := []struct { + name string + fields fields + arg uint32 + want uint32 + wantErr bool + }{ + { + name: "get failed for /proc/ not exist", + arg: 12345, + want: 0, + wantErr: true, + }, + { + name: "get pgid failed for /proc//stat parse failed", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + statPath := GetProcPIDStatPath(54321) + helper.WriteFileContents(statPath, `54321 (stress) S 12340 some invalid content ...`) + }, + }, + arg: 54321, + want: 0, + wantErr: true, + }, + { + name: "get pgid correctly", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + statPath := GetProcPIDStatPath(12345) + helper.WriteFileContents(statPath, `12345 (stress) S 12340 12344 12340 12300 12345 123450 151 0 0 0 0 0 ...`) + }, + }, + arg: 12345, + want: 12344, + wantErr: false, + }, + { + name: "get pgid correctly 1", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + statPath := GetProcPIDStatPath(12345) + helper.WriteFileContents(statPath, `12345 (sh stress) S 12340 12344 12340 12300 12345 123450 151 0 0 0 0 0 ...`) + }, + }, + arg: 12345, + want: 12344, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + helper := NewFileTestUtil(t) + defer helper.Cleanup() + if tt.fields.prepareFn != nil { + tt.fields.prepareFn(helper) + } + got, gotErr := GetPGIDForPID(tt.arg) + assert.Equal(t, tt.wantErr, gotErr != nil, gotErr) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestGetContainerPGIDs(t *testing.T) { + type fields struct { + prepareFn func(helper *FileTestUtil) + } + tests := []struct { + name string + fields fields + arg string + want []uint32 + wantErr bool + }{ + { + name: "get failed when cgroup.procs not exist", + arg: "kubepods-pod12345.slice/cri-containerd-container1.scope", + wantErr: true, + }, + { + name: "get failed when parse cgroup.procs failed", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + cgroupProcs, _ := GetCgroupResource(CPUProcsName) + helper.WriteCgroupFileContents("kubepods-pod12345.slice/cri-containerd-container1.scope", cgroupProcs, "12340\n12341\n12342\ninvalid\n") + }, + }, + arg: "kubepods-pod12345.slice/cri-containerd-container1.scope", + wantErr: true, + }, + { + name: "parse nothing for no valid pid stat", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + cgroupProcs, _ := GetCgroupResource(CPUProcsName) + helper.WriteCgroupFileContents("kubepods-pod12345.slice/cri-containerd-container1.scope", cgroupProcs, "12340\n12342\n12345\n") + }, + }, + arg: "kubepods-pod12345.slice/cri-containerd-container1.scope", + want: nil, + wantErr: false, + }, + { + name: "parse correctly", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + cgroupProcs, _ := GetCgroupResource(CPUProcsName) + helper.WriteCgroupFileContents("kubepods-pod12345.slice/cri-containerd-container1.scope", cgroupProcs, "12340\n12342\n12345\n") + helper.WriteProcSubFileContents("12340/stat", `12340 (bash) S 12340 12340 12340 12300 12340 123400 151 0 0 0 0 0 ...`) + helper.WriteProcSubFileContents("12342/stat", `12342 (stress) S 12340 12340 12340 12300 12342 123450 151 0 0 0 0 0 ...`) + helper.WriteProcSubFileContents("12345/stat", `12345 (stress) S 12342 12340 12340 12300 12345 123450 151 0 0 0 0 0 ...`) + }, + }, + arg: "kubepods-pod12345.slice/cri-containerd-container1.scope", + want: []uint32{ + 12340, + }, + wantErr: false, + }, + { + name: "parse correctly 1", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + cgroupProcs, _ := GetCgroupResource(CPUProcsName) + helper.WriteCgroupFileContents("kubepods-pod12345.slice/cri-containerd-container1.scope", cgroupProcs, "12340\n12342\n12345\n") + helper.WriteProcSubFileContents("12340/stat", `12340 (bash) S 12340 12340 12340 12300 12340 123400 151 0 0 0 0 0 ...`) + helper.WriteProcSubFileContents("12342/stat", `12342 (stress) S 12340 12342 12342 12300 12342 123450 151 0 0 0 0 0 ...`) + helper.WriteProcSubFileContents("12345/stat", `12345 (stress) S 12342 12342 12342 12300 12345 123450 151 0 0 0 0 0 ...`) + }, + }, + arg: "kubepods-pod12345.slice/cri-containerd-container1.scope", + want: []uint32{ + 12340, + 12342, + }, + wantErr: false, + }, + { + name: "parse correctly ignoring non-exist pids", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + cgroupProcs, _ := GetCgroupResource(CPUProcsName) + helper.WriteCgroupFileContents("kubepods-pod12345.slice/cri-containerd-container1.scope", cgroupProcs, "12340\n12342\n12345\n") + helper.WriteProcSubFileContents("12340/stat", `12340 (bash) S 12340 12340 12340 12300 12340 123400 151 0 0 0 0 0 ...`) + helper.WriteProcSubFileContents("12342/stat", `12342 (stress) S 12340 12340 12340 12300 12342 123450 151 0 0 0 0 0 ...`) + }, + }, + arg: "kubepods-pod12345.slice/cri-containerd-container1.scope", + want: []uint32{ + 12340, + }, + wantErr: false, + }, + { + name: "consider pid as PGID if PGID not exist", + fields: fields{ + prepareFn: func(helper *FileTestUtil) { + cgroupProcs, _ := GetCgroupResource(CPUProcsName) + helper.WriteCgroupFileContents("kubepods-pod12345.slice/cri-containerd-container1.scope", cgroupProcs, "12340\n12342\n12345\n") + helper.WriteProcSubFileContents("12340/stat", `12340 (bash) S 12340 12340 12340 12300 12340 123400 151 0 0 0 0 0 ...`) + helper.WriteProcSubFileContents("12342/stat", `12342 (stress) S 12340 12340 12340 12300 12342 123450 151 0 0 0 0 0 ...`) + helper.WriteProcSubFileContents("12345/stat", `12345 (sleep) S 12340 12344 12344 12340 12345 123460 200 0 0 0 0 0 ...`) + }, + }, + arg: "kubepods-pod12345.slice/cri-containerd-container1.scope", + want: []uint32{ + 12340, + 12345, + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + helper := NewFileTestUtil(t) + defer helper.Cleanup() + if tt.fields.prepareFn != nil { + tt.fields.prepareFn(helper) + } + got, gotErr := GetContainerPGIDs(tt.arg) + assert.Equal(t, tt.wantErr, gotErr != nil) + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/pkg/koordlet/util/system/system_file.go b/pkg/koordlet/util/system/system_file.go index 3e47eab89..be513aa2d 100644 --- a/pkg/koordlet/util/system/system_file.go +++ b/pkg/koordlet/util/system/system_file.go @@ -31,15 +31,13 @@ import ( ) const ( - ProcStatName = "stat" - ProcMemInfoName = "meminfo" SysctlSubDir = "sys" - ProcCPUInfoName = "cpuinfo" KernelCmdlineFileName = "cmdline" HugepageDir = "hugepages" nrPath = "nr_hugepages" KernelSchedGroupIdentityEnable = "kernel/sched_group_identity_enabled" + KernelSchedCore = "kernel/sched_core" SysNUMASubDir = "bus/node/devices" SysPCIDeviceDir = "bus/pci/devices" @@ -85,14 +83,6 @@ func GetPeriodTicks(start, end time.Time) float64 { return float64(end.Sub(start)) / Jiffies } -func GetProcFilePath(procRelativePath string) string { - return filepath.Join(Conf.ProcRootDir, procRelativePath) -} - -func GetProcRootDir() string { - return Conf.ProcRootDir -} - func GetSysRootDir() string { return Conf.SysRootDir } @@ -179,3 +169,36 @@ func SetSchedGroupIdentity(enable bool) error { klog.V(4).Infof("SetSchedGroupIdentity set sysctl config successfully, value %v", v) return nil } + +func GetSchedCore() (bool, error) { + s := NewProcSysctl() + // 0: disabled; 1: enabled + cur, err := s.GetSysctl(KernelSchedCore) + if err != nil { + return false, fmt.Errorf("cannot get sysctl sched core, err: %w", err) + } + return cur == 1, nil +} + +func SetSchedCore(enable bool) error { + s := NewProcSysctl() + cur, err := s.GetSysctl(KernelSchedCore) + if err != nil { + return fmt.Errorf("cannot get sysctl sched core, err: %w", err) + } + v := 0 // 0: disabled; 1: enabled + if enable { + v = 1 + } + if cur == v { + klog.V(6).Infof("SetSchedCore skips since current sysctl config is already %v", enable) + return nil + } + + err = s.SetSysctl(KernelSchedCore, v) + if err != nil { + return fmt.Errorf("cannot set sysctl sched core, err: %w", err) + } + klog.V(4).Infof("SetSchedCore set sysctl config successfully, value %v", v) + return nil +} diff --git a/pkg/koordlet/util/system/system_file_test.go b/pkg/koordlet/util/system/system_file_test.go index 3ff47c05d..5b61bf5c9 100644 --- a/pkg/koordlet/util/system/system_file_test.go +++ b/pkg/koordlet/util/system/system_file_test.go @@ -76,3 +76,31 @@ func TestSetSchedGroupIdentity(t *testing.T) { assert.Equal(t, got, testContent) }) } + +func TestSetSchedCore(t *testing.T) { + t.Run("test", func(t *testing.T) { + helper := NewFileTestUtil(t) + + // system not supported + err := SetSchedCore(false) + assert.Error(t, err) + + // system supported, already disabled + testProcSysFile := KernelSchedCore + testProcSysFilepath := filepath.Join(SysctlSubDir, testProcSysFile) + testContent := "0" + assert.False(t, FileExists(GetProcSysFilePath(testProcSysFile))) + helper.WriteProcSubFileContents(testProcSysFilepath, testContent) + err = SetSchedCore(false) + assert.NoError(t, err) + got := helper.ReadProcSubFileContents(testProcSysFilepath) + assert.Equal(t, got, testContent) + + // system supported, set enabled + testContent = "1" + err = SetSchedCore(true) + assert.NoError(t, err) + got = helper.ReadProcSubFileContents(testProcSysFilepath) + assert.Equal(t, got, testContent) + }) +} diff --git a/pkg/koordlet/util/system/system_resource.go b/pkg/koordlet/util/system/system_resource.go index 59e8972a7..2efe7a4d0 100644 --- a/pkg/koordlet/util/system/system_resource.go +++ b/pkg/koordlet/util/system/system_resource.go @@ -24,15 +24,17 @@ import ( ) const ( - ProcSysVmRelativePath = "sys/vm/" - MemcgReaperRelativePath = "kernel/mm/memcg_reaper/" - KidledRelativePath = "kernel/mm/kidled/" + ProcSysVmRelativePath = "sys/vm/" + MemcgReaperRelativePath = "kernel/mm/memcg_reaper/" + KidledRelativePath = "kernel/mm/kidled/" + SchedFeaturesRelativePath = "kernel/debug/" MinFreeKbytesFileName = "min_free_kbytes" WatermarkScaleFactorFileName = "watermark_scale_factor" MemcgReapBackGroundFileName = "reap_background" KidledScanPeriodInSecondsFileName = "scan_period_in_seconds" KidledUseHierarchyFileFileName = "use_hierarchy" + SchedFeaturesFileName = "sched_features" ) var ( @@ -49,6 +51,8 @@ var ( MemcgReapBackGround = NewCommonSystemResource(MemcgReaperRelativePath, MemcgReapBackGroundFileName, GetSysRootDir).WithValidator(MemcgReapBackGroundValidator).WithCheckSupported(SupportedIfFileExists) KidledScanPeriodInSeconds = NewCommonSystemResource(KidledRelativePath, KidledScanPeriodInSecondsFileName, GetSysRootDir).WithValidator(KidledScanPeriodInSecondsValidator).WithCheckSupported(SupportedIfFileExists) KidledUseHierarchy = NewCommonSystemResource(KidledRelativePath, KidledUseHierarchyFileFileName, GetSysRootDir).WithValidator(KidledUseHierarchyValidator).WithCheckSupported(SupportedIfFileExists) + // SchedFeatures is the system file which shows the enabled features of the kernel scheduling. + SchedFeatures = NewCommonSystemResource(SchedFeaturesRelativePath, SchedFeaturesFileName, GetSysRootDir).WithCheckSupported(SupportedIfFileExists) ) var _ Resource = &SystemResource{} diff --git a/pkg/koordlet/util/system/util_test_tool.go b/pkg/koordlet/util/system/util_test_tool.go index 2693772bd..f5f80957a 100644 --- a/pkg/koordlet/util/system/util_test_tool.go +++ b/pkg/koordlet/util/system/util_test_tool.go @@ -24,8 +24,6 @@ import ( "strings" "testing" - "k8s.io/klog/v2" - "github.com/stretchr/testify/assert" ) @@ -60,13 +58,15 @@ type FileTestUtil struct { TempDir string // whether to validate when writing cgroups resources ValidateResource bool + // additional cleanup function for Config to be invoked in Cleanup() + CleanupFn func(config *Config) - t *testing.T + t testing.TB } // NewFileTestUtil creates a new test util for the specified subsystem. // NOTE: this function should be called only for testing purposes. -func NewFileTestUtil(t *testing.T) *FileTestUtil { +func NewFileTestUtil(t testing.TB) *FileTestUtil { // NOTE: When $TMPDIR is not set, `t.TempDir()` can use different base directory on Mac OS X and Linux, which may // generates too long paths to test unix socket. t.Setenv("TMPDIR", "/tmp") @@ -96,6 +96,9 @@ func (c *FileTestUtil) Cleanup() { assert.NoError(c.t, err) } initCgroupsVersion() + if c.CleanupFn != nil { + c.CleanupFn(Conf) + } } func (c *FileTestUtil) SetResourcesSupported(supported bool, resources ...Resource) { @@ -116,6 +119,11 @@ func (c *FileTestUtil) SetValidateResource(enabled bool) { c.ValidateResource = enabled } +func (c *FileTestUtil) SetConf(setFn, cleanupFn func(conf *Config)) { + setFn(Conf) + c.CleanupFn = cleanupFn +} + // if dir contain TempDir, mkdir direct, else join with TempDir and mkdir func (c *FileTestUtil) MkDirAll(testDir string) { dir := testDir @@ -236,7 +244,7 @@ func (c *FileTestUtil) WriteCgroupFileContents(taskDir string, r Resource, conte } } filePath = r.Path(taskDir) - klog.V(5).Infof("write %s [%s]", filePath, contents) + c.t.Logf("write %s [%s]", filePath, contents) err := os.WriteFile(filePath, []byte(contents), 0644) if err != nil { diff --git a/pkg/util/metrics/expire_metric.go b/pkg/util/metrics/expire_metric.go index 47029b1d2..53519fffa 100644 --- a/pkg/util/metrics/expire_metric.go +++ b/pkg/util/metrics/expire_metric.go @@ -17,6 +17,7 @@ limitations under the License. package metrics import ( + "fmt" "sort" "strings" "sync" @@ -57,6 +58,11 @@ func (g *GCGaugeVec) WithSet(labels prometheus.Labels, value float64) { g.expireStatus.UpdateStatus(g.name, labels) } +func (g *GCGaugeVec) Delete(labels prometheus.Labels) { + g.vec.Delete(labels) + g.expireStatus.RemoveStatus(g.name, labels) +} + type GCCounterVec struct { name string vec *prometheus.CounterVec @@ -81,11 +87,18 @@ func (g *GCCounterVec) WithInc(labels prometheus.Labels) { g.expireStatus.UpdateStatus(g.name, labels) } +func (g *GCCounterVec) Delete(labels prometheus.Labels) { + g.vec.Delete(labels) + g.expireStatus.RemoveStatus(g.name, labels) +} + type MetricVecGC interface { // Len returns the length of the alive metric statuses. Len() int // UpdateStatus updates the metric status with the given label values and timestamp (Unix seconds). UpdateStatus(updateTime int64, labels prometheus.Labels) + // RemoveStatus removes the metric status with the given label values. + RemoveStatus(labels prometheus.Labels) // ExpireMetrics expires all metric statuses which are updated before the expired time (Unix seconds). ExpireMetrics(expireTime int64) int } @@ -118,18 +131,31 @@ func (v *metricVecGC) Len() int { } func (v *metricVecGC) UpdateStatus(updateTime int64, labels prometheus.Labels) { + statusKey := labelsToKey(labels) + status := &metricStatus{ + Labels: labels, + lastUpdatedUnix: updateTime, + } + v.lock.Lock() defer v.lock.Unlock() - v.updateStatus(updateTime, labels) + v.updateStatus(statusKey, status) } -func (v *metricVecGC) updateStatus(updateTime int64, labels prometheus.Labels) { +func (v *metricVecGC) updateStatus(statusKey string, status *metricStatus) { + v.statuses[statusKey] = *status +} + +func (v *metricVecGC) RemoveStatus(labels prometheus.Labels) { statusKey := labelsToKey(labels) - status := metricStatus{ - Labels: labels, - lastUpdatedUnix: updateTime, - } - v.statuses[statusKey] = status + + v.lock.Lock() + defer v.lock.Unlock() + v.removeStatus(statusKey) +} + +func (v *metricVecGC) removeStatus(statusKey string) { + delete(v.statuses, statusKey) } func (v *metricVecGC) ExpireMetrics(expireTime int64) int { @@ -138,7 +164,7 @@ func (v *metricVecGC) ExpireMetrics(expireTime int64) int { count := 0 for key, status := range v.statuses { if status.lastUpdatedUnix < expireTime { - delete(v.statuses, key) + v.removeStatus(key) v.metricVec.Delete(status.Labels) count++ klog.V(6).Infof("metricVecGC %s delete metric, key %s, updateTime %v, expireTime %v", @@ -156,6 +182,8 @@ type MetricGC interface { Stop() AddMetric(name string, metric *prometheus.MetricVec) UpdateStatus(name string, labels prometheus.Labels) + RemoveStatus(name string, labels prometheus.Labels) + CountStatus(name string) int } type metricGC struct { @@ -185,6 +213,10 @@ func NewMetricGC(expireTime time.Duration, interval time.Duration) MetricGC { func (e *metricGC) AddMetric(metricName string, metric *prometheus.MetricVec) { e.globalLock.Lock() defer e.globalLock.Unlock() + e.addMetric(metricName, metric) +} + +func (e *metricGC) addMetric(metricName string, metric *prometheus.MetricVec) { vecGC := NewMetricVecGC(metricName, metric) e.metrics[metricName] = vecGC } @@ -217,22 +249,54 @@ func (e *metricGC) Stop() { func (e *metricGC) UpdateStatus(metricName string, labels prometheus.Labels) { e.globalLock.RLock() - defer e.globalLock.RUnlock() // different metric vectors can update simultaneously - e.updateStatus(time.Now().Unix(), metricName, labels) + err := e.updateStatus(time.Now().Unix(), metricName, labels) + e.globalLock.RUnlock() + if err != nil { + klog.Errorf("failed to update status for metric %s, err: %s", metricName) + } } -func (e *metricGC) updateStatus(updateTime int64, metricName string, labels prometheus.Labels) { - if metric := e.metrics[metricName]; metric != nil { - metric.UpdateStatus(updateTime, labels) - } else { - klog.Errorf("metric %v not correctly added", metricName) +func (e *metricGC) updateStatus(updateTime int64, metricName string, labels prometheus.Labels) error { + metric, ok := e.metrics[metricName] + if !ok { + return fmt.Errorf("metric not correctly added") } + metric.UpdateStatus(updateTime, labels) + return nil +} + +func (e *metricGC) RemoveStatus(metricName string, labels prometheus.Labels) { + e.globalLock.RLock() + err := e.removeStatus(metricName, labels) + e.globalLock.RUnlock() + if err != nil { + klog.Errorf("failed to remove status for metric %s, err: %s", metricName) + } +} + +func (e *metricGC) removeStatus(metricName string, labels prometheus.Labels) error { + metric, ok := e.metrics[metricName] + if !ok { + return fmt.Errorf("metric not correctly added") + } + metric.RemoveStatus(labels) + return nil +} + +func (e *metricGC) CountStatus(metricName string) int { + e.globalLock.RLock() + defer e.globalLock.RUnlock() + metric, ok := e.metrics[metricName] + if !ok { + return 0 + } + return metric.Len() } func (e *metricGC) statusLen() int { - e.globalLock.Lock() - defer e.globalLock.Unlock() + e.globalLock.RLock() + defer e.globalLock.RUnlock() statusLen := 0 for _, metric := range e.metrics { statusLen += metric.Len() @@ -241,8 +305,8 @@ func (e *metricGC) statusLen() int { } func (e *metricGC) expire() error { - e.globalLock.Lock() - defer e.globalLock.Unlock() + e.globalLock.RLock() + defer e.globalLock.RUnlock() expireTime := time.Now().Unix() - int64(e.expireTime/time.Second) count := 0 for _, metric := range e.metrics { diff --git a/pkg/util/metrics/expire_metric_test.go b/pkg/util/metrics/expire_metric_test.go index a4d41869f..f4c8d537a 100644 --- a/pkg/util/metrics/expire_metric_test.go +++ b/pkg/util/metrics/expire_metric_test.go @@ -34,7 +34,7 @@ type testMetric struct { const testSubsystem = "test" -func Test_GCGaugeVec_WithSet(t *testing.T) { +func Test_GCGaugeVec(t *testing.T) { metricName := "test_gauge" vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: testSubsystem, @@ -54,20 +54,29 @@ func Test_GCGaugeVec_WithSet(t *testing.T) { testGaugeVec.WithSet(pod1Labels, 1) ms := collectMetrics(vec) assert.Equal(t, 1, len(ms), "checkMetricsNum") + assert.Equal(t, 1, testGaugeVec.expireStatus.CountStatus(metricName), "checkStatusNum") //add metric2 pod2Labels := prometheus.Labels{"node": "node2", "pod_name": "pod2", "pod_namespace": "ns2"} testGaugeVec.WithSet(pod2Labels, 2) ms = collectMetrics(vec) assert.Equal(t, 2, len(ms), "checkMetricsNum") + assert.Equal(t, 2, testGaugeVec.expireStatus.CountStatus(metricName), "checkStatusNum") //update metric1 testGaugeVec.WithSet(pod1Labels, 3) ms = collectMetrics(vec) assert.Equal(t, 2, len(ms), "checkMetricsNum") + assert.Equal(t, 2, testGaugeVec.expireStatus.CountStatus(metricName), "checkStatusNum") + + // delete metric1 + testGaugeVec.Delete(pod1Labels) + ms = collectMetrics(vec) + assert.Equal(t, 1, len(ms), "checkMetricsNum") + assert.Equal(t, 1, testGaugeVec.expireStatus.CountStatus(metricName), "checkStatusNum") } -func Test_GCCounterVec_WithInc(t *testing.T) { +func Test_GCCounterVec(t *testing.T) { metricName := "test_counter" vec := prometheus.NewCounterVec(prometheus.CounterOpts{ Subsystem: testSubsystem, @@ -87,17 +96,26 @@ func Test_GCCounterVec_WithInc(t *testing.T) { testCounterVec.WithInc(pod1Labels) ms := collectMetrics(vec) assert.Equal(t, 1, len(ms), "checkMetricsNum") + assert.Equal(t, 1, testCounterVec.expireStatus.CountStatus(metricName), "checkStatusNum") //add metric2 pod2Labels := prometheus.Labels{"node": "node2", "pod_name": "pod2", "pod_namespace": "ns2"} testCounterVec.WithInc(pod2Labels) ms = collectMetrics(vec) assert.Equal(t, 2, len(ms), "checkMetricsNum") + assert.Equal(t, 2, testCounterVec.expireStatus.CountStatus(metricName), "checkStatusNum") //update metric1 testCounterVec.WithInc(pod1Labels) ms = collectMetrics(vec) assert.Equal(t, 2, len(ms), "checkMetricsNum") + assert.Equal(t, 2, testCounterVec.expireStatus.CountStatus(metricName), "checkStatusNum") + + // delete metric1 + testCounterVec.Delete(pod1Labels) + ms = collectMetrics(vec) + assert.Equal(t, 1, len(ms), "checkMetricsNum") + assert.Equal(t, 1, testCounterVec.expireStatus.CountStatus(metricName), "checkStatusNum") } func Test_MetricGC_GC(t *testing.T) { @@ -124,7 +142,8 @@ func Test_MetricGC_GC(t *testing.T) { metricsUpdate := generatePodMetrics(5, time.Now().Unix()-int64(DefaultExpireTime/time.Second)) for _, m := range metricsUpdate { gcGaugeVec.WithSet(m.labels, m.value) - testMetricGC.updateStatus(m.updateTime, metricName, m.labels) + err := testMetricGC.updateStatus(m.updateTime, metricName, m.labels) + assert.NoError(t, err) } time.Sleep(10 * time.Millisecond) gotMetrics = collectMetrics(gaugeVec) diff --git a/pkg/util/sloconfig/nodeslo_config.go b/pkg/util/sloconfig/nodeslo_config.go index de746152b..d653cc761 100644 --- a/pkg/util/sloconfig/nodeslo_config.go +++ b/pkg/util/sloconfig/nodeslo_config.go @@ -76,8 +76,12 @@ func DefaultCPUQOS(qos apiext.QoSClass) *slov1alpha1.CPUQOS { case apiext.QoSBE: cpuQOS = &slov1alpha1.CPUQOS{ GroupIdentity: pointer.Int64(-1), - SchedIdle: pointer.Int64(1), - CoreExpeller: pointer.Bool(false), + // NOTE: Be careful to enable CPU Idle since it overrides and lock the cpu.shares/cpu.weight of the same + // cgroup to a minimal value. This can affect other components like Kubelet which wants to write + // cpu.shares/cpu.weight to other values. + // https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/commit/?id=304000390f88d049c85e9a0958ac5567f38816ee + SchedIdle: pointer.Int64(0), + CoreExpeller: pointer.Bool(false), } case apiext.QoSSystem: cpuQOS = &slov1alpha1.CPUQOS{