Skip to content

Commit

Permalink
DRA: sanitize Pool names while sanitizing pod- and node- scoped DRA o…
Browse files Browse the repository at this point in the history
…bjects
  • Loading branch information
towca committed Jul 11, 2024
1 parent 09e0a24 commit 535f529
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 12 deletions.
13 changes: 7 additions & 6 deletions cluster-autoscaler/core/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,10 @@ func GetNodeInfoFromTemplate(nodeGroup cloudprovider.NodeGroup, daemonsets []*ap

labels.UpdateDeprecatedLabels(baseNodeInfo.Node().ObjectMeta.Labels)

randSuffix := fmt.Sprintf("%d", rand.Int63())

// Deep copy and sanitize the template node and the associated DRA objects returned from the cloud provider
sanitizedNode, typedErr := SanitizeNode(clustersnapshot.NodeResourceInfo{Node: baseNodeInfo.Node(), DynamicResources: baseNodeInfo.DynamicResources()}, id, taintConfig)
sanitizedNode, typedErr := SanitizeNode(clustersnapshot.NodeResourceInfo{Node: baseNodeInfo.Node(), DynamicResources: baseNodeInfo.DynamicResources()}, id, taintConfig, randSuffix)
if err != nil {
return nil, typedErr
}
Expand All @@ -67,7 +69,7 @@ func GetNodeInfoFromTemplate(nodeGroup cloudprovider.NodeGroup, daemonsets []*ap
startupPods = append(startupPods, clustersnapshot.PodResourceInfo{Pod: podInfo.Pod, DynamicResourceRequests: podInfo.DynamicResourceRequests})
}
// Deep copy and sanitize the startup Pods and the associated DRA objects into fakes pointing to the fake sanitized Node
sanitizedStartupPods := SanitizePods(startupPods, sanitizedNode.Node.Name, fmt.Sprintf("%d", rand.Int63()))
sanitizedStartupPods := SanitizePods(startupPods, sanitizedNode.Node.Name, randSuffix)

// Build the final node info with all 3 parts (Node, Pods, DRA objects) sanitized and in sync.
return clustersnapshot.NewNodeInfo(sanitizedNode, sanitizedStartupPods), nil
Expand Down Expand Up @@ -110,10 +112,9 @@ func DeepCopyNodeInfo(nodeInfo *schedulerframework.NodeInfo) *schedulerframework
}

// SanitizeNode cleans up nodes used for node group templates
func SanitizeNode(nodeResInfo clustersnapshot.NodeResourceInfo, nodeGroup string, taintConfig taints.TaintConfig) (clustersnapshot.NodeResourceInfo, errors.AutoscalerError) {
func SanitizeNode(nodeResInfo clustersnapshot.NodeResourceInfo, nodeGroup string, taintConfig taints.TaintConfig, nameSuffix string) (clustersnapshot.NodeResourceInfo, errors.AutoscalerError) {
newNode := nodeResInfo.Node.DeepCopy()
randSuffix := fmt.Sprintf("%d", rand.Int63())
nodeName := fmt.Sprintf("template-node-for-%s-%s", nodeGroup, randSuffix)
nodeName := fmt.Sprintf("template-node-for-%s-%s", nodeGroup, nameSuffix)
newNode.Labels = make(map[string]string, len(nodeResInfo.Node.Labels))
for k, v := range nodeResInfo.Node.Labels {
if k != apiv1.LabelHostname {
Expand All @@ -124,7 +125,7 @@ func SanitizeNode(nodeResInfo clustersnapshot.NodeResourceInfo, nodeGroup string
}
newNode.Name = nodeName
newNode.Spec.Taints = taints.SanitizeTaints(newNode.Spec.Taints, taintConfig)
newDynamicResources := dynamicresources.SanitizedNodeDynamicResources(nodeResInfo.DynamicResources, newNode.Name, randSuffix)
newDynamicResources := dynamicresources.SanitizedNodeDynamicResources(nodeResInfo.DynamicResources, newNode.Name, nameSuffix)
return clustersnapshot.NodeResourceInfo{Node: newNode, DynamicResources: newDynamicResources}, nil
}

Expand Down
5 changes: 3 additions & 2 deletions cluster-autoscaler/core/utils/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@ limitations under the License.
package utils

import (
"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot"
"strings"
"testing"
"time"

"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot"

"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"

Expand Down Expand Up @@ -56,7 +57,7 @@ func TestSanitizeLabels(t *testing.T) {
apiv1.LabelHostname: "abc",
"x": "y",
}
node, err := SanitizeNode(oldNode, "bzium", taints.TaintConfig{})
node, err := SanitizeNode(oldNode, "bzium", taints.TaintConfig{}, "abc")
assert.NoError(t, err)
assert.NotEqual(t, node.Node.Labels[apiv1.LabelHostname], "abc", nil)
assert.Equal(t, node.Node.Labels["x"], "y")
Expand Down
17 changes: 15 additions & 2 deletions cluster-autoscaler/dynamicresources/sanitize.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package dynamicresources
import (
"fmt"

resourceapi "k8s.io/api/resource/v1alpha3"
"k8s.io/apimachinery/pkg/util/uuid"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
)
Expand All @@ -11,22 +12,26 @@ import (
// - NodeName pointers in all DRA objects are updated to the provided nodeName.
// - Names of all DRA objects get the provided nameSuffix appended.
// - UIDs of all DRA objects are randomized.
// - Names of all Pools that the slices belong to get the provided nameSuffix appended.
//
// This needs to be done anytime we want to add a "copy" of some Node (and so also of its NodeDynamicResources)
// to ClusterSnapshot.
func SanitizedNodeDynamicResources(ndr schedulerframework.NodeDynamicResources, nodeName, nameSuffix string) schedulerframework.NodeDynamicResources {
sanitizedNdr := ndr.DeepCopy()
for _, slice := range sanitizedNdr.ResourceSlices {
slice.Name = fmt.Sprintf("%s-%s", slice.Name, nameSuffix)
slice.Spec.Pool.Name = fmt.Sprintf("%s-%s", slice.Spec.Pool.Name, nameSuffix)
slice.UID = uuid.NewUUID()
slice.Spec.NodeName = nodeName
}
return sanitizedNdr
}

// SanitizedPodDynamicResourceRequests returns a deep copy of the provided PodDynamicResourceRequests where:
// - Names of all DRA objects get the provided nameSuffix appended.
// - UIDs of all DRA objects are randomized.
// - Names of all DRA objects get the provided nameSuffix appended.
// - UIDs of all DRA objects are randomized.
// - Names of all Pools in the allocation results get the provided nameSuffix appended. The suffix should match
// the suffix used while sanitizing Pools in ResourceSlices of its Node.
//
// This needs to be done anytime we want to add a "copy" of some Pod (and so also of its PodDynamicResourceRequests)
// to ClusterSnapshot.
Expand All @@ -35,6 +40,14 @@ func SanitizedPodDynamicResourceRequests(pdr schedulerframework.PodDynamicResour
for _, claim := range sanitizedPdr.ResourceClaims {
claim.Name = fmt.Sprintf("%s-%s", claim.Name, nameSuffix)
claim.UID = uuid.NewUUID()

var sanitizedAllocations []resourceapi.DeviceRequestAllocationResult
for _, devAlloc := range claim.Status.Allocation.Devices.Results {
devAlloc.Pool = fmt.Sprintf("%s-%s", devAlloc.Pool, nameSuffix)
sanitizedAllocations = append(sanitizedAllocations, devAlloc)
}

claim.Status.Allocation.Devices.Results = sanitizedAllocations
}
return sanitizedPdr
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,16 @@ func (p *MixedTemplateNodeInfoProvider) Process(ctx *context.AutoscalingContext,
return false, "", err
}

randSuffix := fmt.Sprintf("%d", rand.Int63())

// Deep copy and sanitize a real Node (and its associated DRA objects) into a fake
sanitizedNode, err := utils.SanitizeNode(clustersnapshot.NewNodeResourceInfo(node, ctx.ClusterSnapshot.DraObjectsSource), id, taintConfig)
sanitizedNode, err := utils.SanitizeNode(clustersnapshot.NewNodeResourceInfo(node, ctx.ClusterSnapshot.DraObjectsSource), id, taintConfig, randSuffix)
if err != nil {
return false, "", err
}

// Deep copy and sanitize the startup Pods (and their associated DRA objects) into fakes pointing to the fake sanitized Node.
sanitizedStartupPods := utils.SanitizePods(startupPods, sanitizedNode.Node.Name, fmt.Sprintf("%d", rand.Int63()))
sanitizedStartupPods := utils.SanitizePods(startupPods, sanitizedNode.Node.Name, randSuffix)

// Build the final node info with all 3 parts (Node, Pods, DRA objects) sanitized and in sync.
result[id] = clustersnapshot.NewNodeInfo(sanitizedNode, sanitizedStartupPods)
Expand Down

0 comments on commit 535f529

Please sign in to comment.