From b5df1d7ed096247260380da1fb75524fcc1bb613 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Tu=C5=BCnik?= Date: Wed, 20 Nov 2024 18:20:36 +0100 Subject: [PATCH] TMP --- .../dynamicresources/resource_claim_utils.go | 19 +-- .../dynamicresources/snapshot.go | 37 ++++++ .../simulator/clustersnapshot/base/basic.go | 15 ++- .../simulator/clustersnapshot/base/delta.go | 5 + .../clustersnapshot/clustersnapshot.go | 16 +++ .../predicate/plugin_runner.go | 30 +++-- .../predicate/predicate_snapshot.go | 112 +++++++++++++++++- .../simulator/framework/infos.go | 2 +- 8 files changed, 210 insertions(+), 26 deletions(-) diff --git a/cluster-autoscaler/dynamicresources/resource_claim_utils.go b/cluster-autoscaler/dynamicresources/resource_claim_utils.go index 2b56a72c2ae4..75a7b93e1520 100644 --- a/cluster-autoscaler/dynamicresources/resource_claim_utils.go +++ b/cluster-autoscaler/dynamicresources/resource_claim_utils.go @@ -18,6 +18,7 @@ package dynamicresources import ( "fmt" + "k8s.io/component-helpers/scheduling/corev1" apiv1 "k8s.io/api/core/v1" resourceapi "k8s.io/api/resource/v1beta1" @@ -52,14 +53,18 @@ func ClaimInUse(claim *resourceapi.ResourceClaim) bool { return len(claim.Status.ReservedFor) > 0 } -// ClaimReservedForPod returns whether the provided claim is currently reserved for the provided Pod. -func ClaimReservedForPod(claim *resourceapi.ResourceClaim, pod *apiv1.Pod) bool { - for _, consumerRef := range claim.Status.ReservedFor { - if claimConsumerReferenceMatchesPod(pod, consumerRef) { - return true - } +// ClaimAvailableOnNode returns whether the provided claim is allocated and available on the provided Node. +func ClaimAvailableOnNode(claim *resourceapi.ResourceClaim, node *apiv1.Node) (bool, error) { + if !ClaimAllocated(claim) { + // Not allocated so not available anywhere. + return false, nil + } + selector := claim.Status.Allocation.NodeSelector + if selector == nil { + // nil means available everywhere. + return true, nil } - return false + return corev1.MatchNodeSelectorTerms(node, claim.Status.Allocation.NodeSelector) } // DeallocateClaimInPlace clears the allocation of the provided claim. diff --git a/cluster-autoscaler/dynamicresources/snapshot.go b/cluster-autoscaler/dynamicresources/snapshot.go index 3604e2165a94..9bb2b8d638b7 100644 --- a/cluster-autoscaler/dynamicresources/snapshot.go +++ b/cluster-autoscaler/dynamicresources/snapshot.go @@ -18,6 +18,7 @@ package dynamicresources import ( "fmt" + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" apiv1 "k8s.io/api/core/v1" resourceapi "k8s.io/api/resource/v1beta1" @@ -57,6 +58,42 @@ func (s Snapshot) DeviceClasses() schedulerframework.DeviceClassLister { return snapshotClassLister(s) } +// WrapSchedulerNodeInfo wraps the provided *schedulerframework.NodeInfo into an internal *framework.NodeInfo, adding +// dra information. Node-local ResourceSlices are added to the NodeInfo, and all ResourceClaims referenced by each Pod +// are added to each PodInfo. Returns an error if any of the Pods is missing a ResourceClaim. +func (s Snapshot) WrapSchedulerNodeInfo(schedNodeInfo *schedulerframework.NodeInfo) (*framework.NodeInfo, error) { + var pods []*framework.PodInfo + for _, podInfo := range schedNodeInfo.Pods { + podClaims, err := s.PodClaims(podInfo.Pod) + if err != nil { + return nil, err + } + pods = append(pods, &framework.PodInfo{ + Pod: podInfo.Pod, + NeededResourceClaims: podClaims, + }) + } + nodeSlices, _ := s.NodeResourceSlices(schedNodeInfo.Node()) + return &framework.NodeInfo{ + NodeInfo: schedNodeInfo, + LocalResourceSlices: nodeSlices, + Pods: pods, + }, nil +} + +// WrapSchedulerNodeInfos calls WrapSchedulerNodeInfo() on a list of *schedulerframework.NodeInfos. +func (s Snapshot) WrapSchedulerNodeInfos(schedNodeInfos []*schedulerframework.NodeInfo) ([]*framework.NodeInfo, error) { + var result []*framework.NodeInfo + for _, schedNodeInfo := range schedNodeInfos { + nodeInfo, err := s.WrapSchedulerNodeInfo(schedNodeInfo) + if err != nil { + return nil, err + } + result = append(result, nodeInfo) + } + return result, nil +} + // Clone returns a copy of this Snapshot that can be independently modified without affecting this Snapshot. // The only mutable objects in the Snapshot are ResourceClaims, so they are deep-copied. The rest is only a // shallow copy. diff --git a/cluster-autoscaler/simulator/clustersnapshot/base/basic.go b/cluster-autoscaler/simulator/clustersnapshot/base/basic.go index b35a1595f183..610918600d10 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/base/basic.go +++ b/cluster-autoscaler/simulator/clustersnapshot/base/basic.go @@ -18,6 +18,7 @@ package base import ( "fmt" + "k8s.io/autoscaler/cluster-autoscaler/dynamicresources" apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" @@ -35,6 +36,7 @@ type BasicSnapshotBase struct { type internalBasicSnapshotData struct { nodeInfoMap map[string]*schedulerframework.NodeInfo pvcNamespacePodMap map[string]map[string]bool + draSnapshot dynamicresources.Snapshot } func (data *internalBasicSnapshotData) listNodeInfos() []*schedulerframework.NodeInfo { @@ -123,6 +125,7 @@ func newInternalBasicSnapshotData() *internalBasicSnapshotData { return &internalBasicSnapshotData{ nodeInfoMap: make(map[string]*schedulerframework.NodeInfo), pvcNamespacePodMap: make(map[string]map[string]bool), + draSnapshot: dynamicresources.Snapshot{}, } } @@ -141,6 +144,7 @@ func (data *internalBasicSnapshotData) clone() *internalBasicSnapshotData { return &internalBasicSnapshotData{ nodeInfoMap: clonedNodeInfoMap, pvcNamespacePodMap: clonedPvcNamespaceNodeMap, + draSnapshot: data.draSnapshot.Clone(), } } @@ -205,19 +209,24 @@ func (snapshot *BasicSnapshotBase) getInternalData() *internalBasicSnapshotData return snapshot.data[len(snapshot.data)-1] } +// DraSnapshot returns the DRA snapshot. +func (snapshot *BasicSnapshotBase) DraSnapshot() clustersnapshot.DraSnapshotMutator { + return snapshot.getInternalData().draSnapshot +} + // GetNodeInfo gets a NodeInfo. func (snapshot *BasicSnapshotBase) GetNodeInfo(nodeName string) (*framework.NodeInfo, error) { schedNodeInfo, err := snapshot.getInternalData().getNodeInfo(nodeName) if err != nil { return nil, err } - return framework.WrapSchedulerNodeInfo(schedNodeInfo), nil + return snapshot.getInternalData().draSnapshot.WrapSchedulerNodeInfo(schedNodeInfo) } // ListNodeInfos lists NodeInfos. func (snapshot *BasicSnapshotBase) ListNodeInfos() ([]*framework.NodeInfo, error) { schedNodeInfos := snapshot.getInternalData().listNodeInfos() - return framework.WrapSchedulerNodeInfos(schedNodeInfos), nil + return snapshot.getInternalData().draSnapshot.WrapSchedulerNodeInfos(schedNodeInfos) } // AddNodeInfo adds a NodeInfo. @@ -321,7 +330,7 @@ func (snapshot *BasicSnapshotBase) StorageInfos() schedulerframework.StorageInfo // SharedDRAManager exposes snapshot as SharedDRAManager. func (snapshot *BasicSnapshotBase) SharedDRAManager() schedulerframework.SharedDRAManager { - return nil + return snapshot.getInternalData().draSnapshot } // List returns the list of nodes in the snapshot. diff --git a/cluster-autoscaler/simulator/clustersnapshot/base/delta.go b/cluster-autoscaler/simulator/clustersnapshot/base/delta.go index 1090d68dcd38..b1cb63b784cf 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/base/delta.go +++ b/cluster-autoscaler/simulator/clustersnapshot/base/delta.go @@ -399,6 +399,11 @@ func NewDeltaSnapshotBase() *DeltaSnapshotBase { return snapshot } +// DraSnapshot returns the DRA snapshot. +func (snapshot *DeltaSnapshotBase) DraSnapshot() clustersnapshot.DraSnapshotMutator { + return nil +} + // GetNodeInfo gets a NodeInfo. func (snapshot *DeltaSnapshotBase) GetNodeInfo(nodeName string) (*framework.NodeInfo, error) { schedNodeInfo, err := snapshot.getNodeInfo(nodeName) diff --git a/cluster-autoscaler/simulator/clustersnapshot/clustersnapshot.go b/cluster-autoscaler/simulator/clustersnapshot/clustersnapshot.go index a3d8af16f5cc..1b3eec56d326 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/clustersnapshot.go +++ b/cluster-autoscaler/simulator/clustersnapshot/clustersnapshot.go @@ -19,6 +19,7 @@ package clustersnapshot import ( "errors" apiv1 "k8s.io/api/core/v1" + resourceapi "k8s.io/api/resource/v1beta1" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/klog/v2" ) @@ -77,6 +78,9 @@ type SnapshotBase interface { // ListNodeInfos returns internal NodeInfos for all Nodes tracked in the snapshot. See the comment on GetNodeInfo. ListNodeInfos() ([]*framework.NodeInfo, error) + // DraSnapshot returns an interface that allows accessing and modifying the DRA objects in the snapshot. + DraSnapshot() DraSnapshotMutator + // Fork creates a fork of snapshot state. All modifications can later be reverted to moment of forking via Revert(). // Use WithForkedSnapshot() helper function instead if possible. Fork() @@ -86,6 +90,18 @@ type SnapshotBase interface { Commit() error } +// DraSnapshotMutator allows accessing and modifying DRA objects in a snapshot. +type DraSnapshotMutator interface { + PodClaims(pod *apiv1.Pod) ([]*resourceapi.ResourceClaim, error) + RemovePodClaims(pod *apiv1.Pod) + ReservePodClaims(pod *apiv1.Pod) error + UnreservePodClaims(pod *apiv1.Pod) error + + NodeResourceSlices(node *apiv1.Node) ([]*resourceapi.ResourceSlice, bool) + AddNodeResourceSlices(nodeName string, slices []*resourceapi.ResourceSlice) error + RemoveNodeResourceSlices(nodeName string) +} + // ErrNodeNotFound means that a node wasn't found in the snapshot. var ErrNodeNotFound = errors.New("node not found") diff --git a/cluster-autoscaler/simulator/clustersnapshot/predicate/plugin_runner.go b/cluster-autoscaler/simulator/clustersnapshot/predicate/plugin_runner.go index 82b49b54670d..07de31c7fb11 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/predicate/plugin_runner.go +++ b/cluster-autoscaler/simulator/clustersnapshot/predicate/plugin_runner.go @@ -44,10 +44,10 @@ func NewSchedulerPluginRunner(fwHandle *framework.Handle, snapshotBase clustersn // function - until a Node where the filters pass is found. Filters are only run for matching Nodes. If no matching node with passing filters is found, an error is returned. // // The node iteration always starts from the next Node from the last Node that was found by this method. TODO: Extract the iteration strategy out of SchedulerPluginRunner. -func (p *SchedulerPluginRunner) RunFiltersUntilPassingNode(pod *apiv1.Pod, nodeMatches func(*framework.NodeInfo) bool) (string, clustersnapshot.SchedulingError) { +func (p *SchedulerPluginRunner) RunFiltersUntilPassingNode(pod *apiv1.Pod, nodeMatches func(*framework.NodeInfo) bool) (*apiv1.Node, *schedulerframework.CycleState, clustersnapshot.SchedulingError) { nodeInfosList, err := p.snapshotBase.ListNodeInfos() if err != nil { - return "", clustersnapshot.NewSchedulingInternalError(pod, "ClusterSnapshot not provided") + return nil, nil, clustersnapshot.NewSchedulingInternalError(pod, "ClusterSnapshot not provided") } p.fwHandle.DelegatingLister.UpdateDelegate(p.snapshotBase) @@ -56,7 +56,7 @@ func (p *SchedulerPluginRunner) RunFiltersUntilPassingNode(pod *apiv1.Pod, nodeM state := schedulerframework.NewCycleState() preFilterResult, preFilterStatus, _ := p.fwHandle.Framework.RunPreFilterPlugins(context.TODO(), state, pod) if !preFilterStatus.IsSuccess() { - return "", clustersnapshot.NewFailingPredicateError(pod, preFilterStatus.Plugin(), preFilterStatus.Reasons(), "PreFilter failed", "") + return nil, nil, clustersnapshot.NewFailingPredicateError(pod, preFilterStatus.Plugin(), preFilterStatus.Reasons(), "PreFilter failed", "") } for i := range nodeInfosList { @@ -77,17 +77,17 @@ func (p *SchedulerPluginRunner) RunFiltersUntilPassingNode(pod *apiv1.Pod, nodeM filterStatus := p.fwHandle.Framework.RunFilterPlugins(context.TODO(), state, pod, nodeInfo.ToScheduler()) if filterStatus.IsSuccess() { p.lastIndex = (p.lastIndex + i + 1) % len(nodeInfosList) - return nodeInfo.Node().Name, nil + return nodeInfo.Node(), state, nil } } - return "", clustersnapshot.NewNoNodesPassingPredicatesFoundError(pod) + return nil, nil, clustersnapshot.NewNoNodesPassingPredicatesFoundError(pod) } // RunFiltersOnNode runs the scheduler framework PreFilter and Filter phases to check if the given pod can be scheduled on the given node. -func (p *SchedulerPluginRunner) RunFiltersOnNode(pod *apiv1.Pod, nodeName string) clustersnapshot.SchedulingError { +func (p *SchedulerPluginRunner) RunFiltersOnNode(pod *apiv1.Pod, nodeName string) (*apiv1.Node, *schedulerframework.CycleState, clustersnapshot.SchedulingError) { nodeInfo, err := p.snapshotBase.GetNodeInfo(nodeName) if err != nil { - return clustersnapshot.NewSchedulingInternalError(pod, fmt.Sprintf("error obtaining NodeInfo for name %q: %v", nodeName, err)) + return nil, nil, clustersnapshot.NewSchedulingInternalError(pod, fmt.Sprintf("error obtaining NodeInfo for name %q: %v", nodeName, err)) } p.fwHandle.DelegatingLister.UpdateDelegate(p.snapshotBase) @@ -96,7 +96,7 @@ func (p *SchedulerPluginRunner) RunFiltersOnNode(pod *apiv1.Pod, nodeName string state := schedulerframework.NewCycleState() _, preFilterStatus, _ := p.fwHandle.Framework.RunPreFilterPlugins(context.TODO(), state, pod) if !preFilterStatus.IsSuccess() { - return clustersnapshot.NewFailingPredicateError(pod, preFilterStatus.Plugin(), preFilterStatus.Reasons(), "PreFilter failed", "") + return nil, nil, clustersnapshot.NewFailingPredicateError(pod, preFilterStatus.Plugin(), preFilterStatus.Reasons(), "PreFilter failed", "") } filterStatus := p.fwHandle.Framework.RunFilterPlugins(context.TODO(), state, pod, nodeInfo.ToScheduler()) @@ -108,9 +108,21 @@ func (p *SchedulerPluginRunner) RunFiltersOnNode(pod *apiv1.Pod, nodeName string if !filterStatus.IsRejected() { unexpectedErrMsg = fmt.Sprintf("unexpected filter status %q", filterStatus.Code().String()) } - return clustersnapshot.NewFailingPredicateError(pod, filterName, filterReasons, unexpectedErrMsg, p.failingFilterDebugInfo(filterName, nodeInfo)) + return nil, nil, clustersnapshot.NewFailingPredicateError(pod, filterName, filterReasons, unexpectedErrMsg, p.failingFilterDebugInfo(filterName, nodeInfo)) } + return nodeInfo.Node(), state, nil +} + +// RunReserveOnNode runs the scheduler framework Reserve phase to update the scheduler plugins state to reflect the Pod being scheduled on the Node. +func (p *SchedulerPluginRunner) RunReserveOnNode(pod *apiv1.Pod, nodeName string, postFilterState *schedulerframework.CycleState) error { + p.fwHandle.DelegatingLister.UpdateDelegate(p.snapshotBase) + defer p.fwHandle.DelegatingLister.ResetDelegate() + + status := p.fwHandle.Framework.RunReservePluginsReserve(context.Background(), postFilterState, pod, nodeName) + if !status.IsSuccess() { + return fmt.Errorf("couldn't reserve node %s for pod %s/%s: %v", nodeName, pod.Namespace, pod.Name, status.Message()) + } return nil } diff --git a/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot.go b/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot.go index 3e1156b58dae..5f2cfff51abd 100644 --- a/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot.go +++ b/cluster-autoscaler/simulator/clustersnapshot/predicate/predicate_snapshot.go @@ -17,9 +17,12 @@ limitations under the License. package predicate import ( + "fmt" apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/dynamicresources" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" ) // PredicateSnapshot implements ClusterSnapshot on top of a SnapshotBase by using @@ -27,21 +30,59 @@ import ( type PredicateSnapshot struct { clustersnapshot.SnapshotBase pluginRunner *SchedulerPluginRunner + draEnabled bool } // NewPredicateSnapshot builds a PredicateSnapshot. -func NewPredicateSnapshot(snapshotBase clustersnapshot.SnapshotBase, fwHandle *framework.Handle) *PredicateSnapshot { +func NewPredicateSnapshot(snapshotBase clustersnapshot.SnapshotBase, fwHandle *framework.Handle, draEnabled bool) *PredicateSnapshot { return &PredicateSnapshot{ SnapshotBase: snapshotBase, pluginRunner: NewSchedulerPluginRunner(fwHandle, snapshotBase), + draEnabled: draEnabled, } } +func (s *PredicateSnapshot) AddNodeInfo(nodeInfo *framework.NodeInfo) error { + if s.draEnabled { + // We need to add extra ResourceSlices to the DRA snapshot. The DRA snapshot should contain all real slices after SetClusterState(), + // so these should be fake node-local slices for a fake duplicated Node. + err := s.SnapshotBase.DraSnapshot().AddNodeResourceSlices(nodeInfo.Node().Name, nodeInfo.LocalResourceSlices) + if err != nil { + return fmt.Errorf("couldn't add ResourceSlices to DRA snapshot: %v", err) + } + } + + return s.SnapshotBase.AddNodeInfo(nodeInfo) +} + +func (s *PredicateSnapshot) RemoveNodeInfo(nodeName string) error { + if s.draEnabled { + nodeInfo, err := s.SnapshotBase.GetNodeInfo(nodeName) + if err != nil { + return err + } + s.SnapshotBase.DraSnapshot().RemoveNodeResourceSlices(nodeName) + for _, pod := range nodeInfo.Pods() { + s.SnapshotBase.DraSnapshot().RemovePodClaims(pod.Pod) + } + } + + return s.SnapshotBase.RemoveNodeInfo(nodeName) +} + // SchedulePod adds pod to the snapshot and schedules it to given node. func (s *PredicateSnapshot) SchedulePod(pod *apiv1.Pod, nodeName string) clustersnapshot.SchedulingError { - if schedErr := s.pluginRunner.RunFiltersOnNode(pod, nodeName); schedErr != nil { + node, cycleState, schedErr := s.pluginRunner.RunFiltersOnNode(pod, nodeName) + if schedErr != nil { return schedErr } + + if s.draEnabled { + if err := s.handleResourceClaimModifications(pod, node, cycleState); err != nil { + return clustersnapshot.NewSchedulingInternalError(pod, err.Error()) + } + } + if err := s.ForceAddPod(pod, nodeName); err != nil { return clustersnapshot.NewSchedulingInternalError(pod, err.Error()) } @@ -50,22 +91,81 @@ func (s *PredicateSnapshot) SchedulePod(pod *apiv1.Pod, nodeName string) cluster // SchedulePodOnAnyNodeMatching adds pod to the snapshot and schedules it to any node matching the provided function. func (s *PredicateSnapshot) SchedulePodOnAnyNodeMatching(pod *apiv1.Pod, anyNodeMatching func(*framework.NodeInfo) bool) (string, clustersnapshot.SchedulingError) { - nodeName, schedErr := s.pluginRunner.RunFiltersUntilPassingNode(pod, anyNodeMatching) + node, cycleState, schedErr := s.pluginRunner.RunFiltersUntilPassingNode(pod, anyNodeMatching) if schedErr != nil { return "", schedErr } - if err := s.ForceAddPod(pod, nodeName); err != nil { + + if s.draEnabled { + if err := s.handleResourceClaimModifications(pod, node, cycleState); err != nil { + return "", clustersnapshot.NewSchedulingInternalError(pod, err.Error()) + } + } + + if err := s.ForceAddPod(pod, node.Name); err != nil { return "", clustersnapshot.NewSchedulingInternalError(pod, err.Error()) } - return nodeName, nil + return node.Name, nil } // UnschedulePod removes the given Pod from the given Node inside the snapshot. func (s *PredicateSnapshot) UnschedulePod(namespace string, podName string, nodeName string) error { + if s.draEnabled { + nodeInfo, err := s.SnapshotBase.GetNodeInfo(nodeName) + if err != nil { + return err + } + var foundPod *apiv1.Pod + for _, pod := range nodeInfo.Pods() { + if pod.Namespace == namespace && pod.Name == podName { + foundPod = pod.Pod + break + } + } + if foundPod == nil { + return fmt.Errorf("pod %s/%s not found on node %s", namespace, podName, nodeName) + } + if err := s.SnapshotBase.DraSnapshot().UnreservePodClaims(foundPod); err != nil { + return err + } + } return s.ForceRemovePod(namespace, podName, nodeName) } // CheckPredicates checks whether scheduler predicates pass for the given pod on the given node. func (s *PredicateSnapshot) CheckPredicates(pod *apiv1.Pod, nodeName string) clustersnapshot.SchedulingError { - return s.pluginRunner.RunFiltersOnNode(pod, nodeName) + _, _, err := s.pluginRunner.RunFiltersOnNode(pod, nodeName) + return err +} + +func (s *PredicateSnapshot) handleResourceClaimModifications(pod *apiv1.Pod, node *apiv1.Node, postFilterState *schedulerframework.CycleState) error { + if len(pod.Spec.ResourceClaims) == 0 { + return nil + } + // We need to run the scheduler Reserve phase to allocate the appropriate ResourceClaims in the DRA snapshot. The allocations are + // actually computed and cached in the Filter phase, and Reserve only grabs them from the cycle state. So this should be quick, but + // it needs the cycle state from after running the Filter phase. + err := s.pluginRunner.RunReserveOnNode(pod, node.Name, postFilterState) + if err != nil { + return fmt.Errorf("error while trying to run Reserve node %s for pod %s/%s: %v", node.Name, pod.Namespace, pod.Name, err) + } + + // The pod isn't added to the ReservedFor field of the claim during the Reserve phase (it happens later, in PreBind). We can just do it + // manually here. It shouldn't fail, it only fails if ReservedFor is at max length already, but that is checked during the Filter phase. + err = s.SnapshotBase.DraSnapshot().ReservePodClaims(pod) + if err != nil { + return fmt.Errorf("couldnn't add pod reservations to claims, this shouldn't happen: %v", err) + } + + // Verify that all needed claims are tracked in the DRA snapshot, allocated, and available on the Node. + claims, err := s.SnapshotBase.DraSnapshot().PodClaims(pod) + if err != nil { + return fmt.Errorf("couldn't obtain pod %s/%s claims: %v", pod.Namespace, pod.Name, err) + } + for _, claim := range claims { + if available, err := dynamicresources.ClaimAvailableOnNode(claim, node); err != nil || !available { + return fmt.Errorf("pod %s/%s needs claim %s to schedule, but it isn't available on node %s (allocated: %v, available: %v, err: %v)", pod.Namespace, pod.Name, claim.Name, node.Name, dynamicresources.ClaimAllocated(claim), available, err) + } + } + return nil } diff --git a/cluster-autoscaler/simulator/framework/infos.go b/cluster-autoscaler/simulator/framework/infos.go index b3f799066cb8..7f4919e9654e 100644 --- a/cluster-autoscaler/simulator/framework/infos.go +++ b/cluster-autoscaler/simulator/framework/infos.go @@ -127,7 +127,7 @@ func NewNodeInfo(node *apiv1.Node, slices []*resourceapi.ResourceSlice, pods ... } // WrapSchedulerNodeInfo wraps a *schedulerframework.NodeInfo into an internal *NodeInfo. -func WrapSchedulerNodeInfo(schedNodeInfo *schedulerframework.NodeInfo) *NodeInfo { +func WrapSchedulerNodeInfo(schedNodeInfo *schedulerframework.NodeInfo, slices []*resourceapi.ResourceSlice) *NodeInfo { return &NodeInfo{ schedNodeInfo: schedNodeInfo, podsExtraInfo: map[types.UID]podExtraInfo{},