From 535f5296a13ad64cd9f4d2d50095626b30dc9474 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Tu=C5=BCnik?= Date: Thu, 11 Jul 2024 23:32:02 +0200 Subject: [PATCH] DRA: sanitize Pool names while sanitizing pod- and node- scoped DRA objects --- cluster-autoscaler/core/utils/utils.go | 13 +++++++------ cluster-autoscaler/core/utils/utils_test.go | 5 +++-- cluster-autoscaler/dynamicresources/sanitize.go | 17 +++++++++++++++-- .../mixed_nodeinfos_processor.go | 6 ++++-- 4 files changed, 29 insertions(+), 12 deletions(-) diff --git a/cluster-autoscaler/core/utils/utils.go b/cluster-autoscaler/core/utils/utils.go index 96ce8c3b2a77..139959995ad9 100644 --- a/cluster-autoscaler/core/utils/utils.go +++ b/cluster-autoscaler/core/utils/utils.go @@ -46,8 +46,10 @@ func GetNodeInfoFromTemplate(nodeGroup cloudprovider.NodeGroup, daemonsets []*ap labels.UpdateDeprecatedLabels(baseNodeInfo.Node().ObjectMeta.Labels) + randSuffix := fmt.Sprintf("%d", rand.Int63()) + // Deep copy and sanitize the template node and the associated DRA objects returned from the cloud provider - sanitizedNode, typedErr := SanitizeNode(clustersnapshot.NodeResourceInfo{Node: baseNodeInfo.Node(), DynamicResources: baseNodeInfo.DynamicResources()}, id, taintConfig) + sanitizedNode, typedErr := SanitizeNode(clustersnapshot.NodeResourceInfo{Node: baseNodeInfo.Node(), DynamicResources: baseNodeInfo.DynamicResources()}, id, taintConfig, randSuffix) if err != nil { return nil, typedErr } @@ -67,7 +69,7 @@ func GetNodeInfoFromTemplate(nodeGroup cloudprovider.NodeGroup, daemonsets []*ap startupPods = append(startupPods, clustersnapshot.PodResourceInfo{Pod: podInfo.Pod, DynamicResourceRequests: podInfo.DynamicResourceRequests}) } // Deep copy and sanitize the startup Pods and the associated DRA objects into fakes pointing to the fake sanitized Node - sanitizedStartupPods := SanitizePods(startupPods, sanitizedNode.Node.Name, fmt.Sprintf("%d", rand.Int63())) + sanitizedStartupPods := SanitizePods(startupPods, sanitizedNode.Node.Name, randSuffix) // Build the final node info with all 3 parts (Node, Pods, DRA objects) sanitized and in sync. return clustersnapshot.NewNodeInfo(sanitizedNode, sanitizedStartupPods), nil @@ -110,10 +112,9 @@ func DeepCopyNodeInfo(nodeInfo *schedulerframework.NodeInfo) *schedulerframework } // SanitizeNode cleans up nodes used for node group templates -func SanitizeNode(nodeResInfo clustersnapshot.NodeResourceInfo, nodeGroup string, taintConfig taints.TaintConfig) (clustersnapshot.NodeResourceInfo, errors.AutoscalerError) { +func SanitizeNode(nodeResInfo clustersnapshot.NodeResourceInfo, nodeGroup string, taintConfig taints.TaintConfig, nameSuffix string) (clustersnapshot.NodeResourceInfo, errors.AutoscalerError) { newNode := nodeResInfo.Node.DeepCopy() - randSuffix := fmt.Sprintf("%d", rand.Int63()) - nodeName := fmt.Sprintf("template-node-for-%s-%s", nodeGroup, randSuffix) + nodeName := fmt.Sprintf("template-node-for-%s-%s", nodeGroup, nameSuffix) newNode.Labels = make(map[string]string, len(nodeResInfo.Node.Labels)) for k, v := range nodeResInfo.Node.Labels { if k != apiv1.LabelHostname { @@ -124,7 +125,7 @@ func SanitizeNode(nodeResInfo clustersnapshot.NodeResourceInfo, nodeGroup string } newNode.Name = nodeName newNode.Spec.Taints = taints.SanitizeTaints(newNode.Spec.Taints, taintConfig) - newDynamicResources := dynamicresources.SanitizedNodeDynamicResources(nodeResInfo.DynamicResources, newNode.Name, randSuffix) + newDynamicResources := dynamicresources.SanitizedNodeDynamicResources(nodeResInfo.DynamicResources, newNode.Name, nameSuffix) return clustersnapshot.NodeResourceInfo{Node: newNode, DynamicResources: newDynamicResources}, nil } diff --git a/cluster-autoscaler/core/utils/utils_test.go b/cluster-autoscaler/core/utils/utils_test.go index 9ccfe7c39b10..66bde0f6ae7f 100644 --- a/cluster-autoscaler/core/utils/utils_test.go +++ b/cluster-autoscaler/core/utils/utils_test.go @@ -17,11 +17,12 @@ limitations under the License. package utils import ( - "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "strings" "testing" "time" + "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" + "k8s.io/autoscaler/cluster-autoscaler/utils/taints" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" @@ -56,7 +57,7 @@ func TestSanitizeLabels(t *testing.T) { apiv1.LabelHostname: "abc", "x": "y", } - node, err := SanitizeNode(oldNode, "bzium", taints.TaintConfig{}) + node, err := SanitizeNode(oldNode, "bzium", taints.TaintConfig{}, "abc") assert.NoError(t, err) assert.NotEqual(t, node.Node.Labels[apiv1.LabelHostname], "abc", nil) assert.Equal(t, node.Node.Labels["x"], "y") diff --git a/cluster-autoscaler/dynamicresources/sanitize.go b/cluster-autoscaler/dynamicresources/sanitize.go index 4f0c15d0379e..da01ff84f7c6 100644 --- a/cluster-autoscaler/dynamicresources/sanitize.go +++ b/cluster-autoscaler/dynamicresources/sanitize.go @@ -3,6 +3,7 @@ package dynamicresources import ( "fmt" + resourceapi "k8s.io/api/resource/v1alpha3" "k8s.io/apimachinery/pkg/util/uuid" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" ) @@ -11,6 +12,7 @@ import ( // - NodeName pointers in all DRA objects are updated to the provided nodeName. // - Names of all DRA objects get the provided nameSuffix appended. // - UIDs of all DRA objects are randomized. +// - Names of all Pools that the slices belong to get the provided nameSuffix appended. // // This needs to be done anytime we want to add a "copy" of some Node (and so also of its NodeDynamicResources) // to ClusterSnapshot. @@ -18,6 +20,7 @@ func SanitizedNodeDynamicResources(ndr schedulerframework.NodeDynamicResources, sanitizedNdr := ndr.DeepCopy() for _, slice := range sanitizedNdr.ResourceSlices { slice.Name = fmt.Sprintf("%s-%s", slice.Name, nameSuffix) + slice.Spec.Pool.Name = fmt.Sprintf("%s-%s", slice.Spec.Pool.Name, nameSuffix) slice.UID = uuid.NewUUID() slice.Spec.NodeName = nodeName } @@ -25,8 +28,10 @@ func SanitizedNodeDynamicResources(ndr schedulerframework.NodeDynamicResources, } // SanitizedPodDynamicResourceRequests returns a deep copy of the provided PodDynamicResourceRequests where: -// - Names of all DRA objects get the provided nameSuffix appended. -// - UIDs of all DRA objects are randomized. +// - Names of all DRA objects get the provided nameSuffix appended. +// - UIDs of all DRA objects are randomized. +// - Names of all Pools in the allocation results get the provided nameSuffix appended. The suffix should match +// the suffix used while sanitizing Pools in ResourceSlices of its Node. // // This needs to be done anytime we want to add a "copy" of some Pod (and so also of its PodDynamicResourceRequests) // to ClusterSnapshot. @@ -35,6 +40,14 @@ func SanitizedPodDynamicResourceRequests(pdr schedulerframework.PodDynamicResour for _, claim := range sanitizedPdr.ResourceClaims { claim.Name = fmt.Sprintf("%s-%s", claim.Name, nameSuffix) claim.UID = uuid.NewUUID() + + var sanitizedAllocations []resourceapi.DeviceRequestAllocationResult + for _, devAlloc := range claim.Status.Allocation.Devices.Results { + devAlloc.Pool = fmt.Sprintf("%s-%s", devAlloc.Pool, nameSuffix) + sanitizedAllocations = append(sanitizedAllocations, devAlloc) + } + + claim.Status.Allocation.Devices.Results = sanitizedAllocations } return sanitizedPdr } diff --git a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go index 4f20c09c3072..9d6c547b7894 100644 --- a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go +++ b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go @@ -104,14 +104,16 @@ func (p *MixedTemplateNodeInfoProvider) Process(ctx *context.AutoscalingContext, return false, "", err } + randSuffix := fmt.Sprintf("%d", rand.Int63()) + // Deep copy and sanitize a real Node (and its associated DRA objects) into a fake - sanitizedNode, err := utils.SanitizeNode(clustersnapshot.NewNodeResourceInfo(node, ctx.ClusterSnapshot.DraObjectsSource), id, taintConfig) + sanitizedNode, err := utils.SanitizeNode(clustersnapshot.NewNodeResourceInfo(node, ctx.ClusterSnapshot.DraObjectsSource), id, taintConfig, randSuffix) if err != nil { return false, "", err } // Deep copy and sanitize the startup Pods (and their associated DRA objects) into fakes pointing to the fake sanitized Node. - sanitizedStartupPods := utils.SanitizePods(startupPods, sanitizedNode.Node.Name, fmt.Sprintf("%d", rand.Int63())) + sanitizedStartupPods := utils.SanitizePods(startupPods, sanitizedNode.Node.Name, randSuffix) // Build the final node info with all 3 parts (Node, Pods, DRA objects) sanitized and in sync. result[id] = clustersnapshot.NewNodeInfo(sanitizedNode, sanitizedStartupPods)