Skip to content

Commit

Permalink
[RayJob] RayJob deletion policy validation (#2771)
Browse files Browse the repository at this point in the history
Signed-off-by: Rueian <[email protected]>
  • Loading branch information
rueian authored Jan 19, 2025
1 parent 7f95a6c commit f191a75
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 0 deletions.
16 changes: 16 additions & 0 deletions ray-operator/controllers/ray/rayjob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,22 @@ func validateRayJobSpec(rayJob *rayv1.RayJob) error {
if rayJob.Spec.BackoffLimit != nil && *rayJob.Spec.BackoffLimit < 0 {
return fmt.Errorf("backoffLimit must be a positive integer")
}
if !features.Enabled(features.RayJobDeletionPolicy) && rayJob.Spec.DeletionPolicy != nil {
return fmt.Errorf("RayJobDeletionPolicy feature gate must be enabled to use the DeletionPolicy feature")
}
if rayJob.Spec.ClusterSelector != nil &&
rayJob.Spec.DeletionPolicy != nil && *rayJob.Spec.DeletionPolicy == rayv1.DeleteClusterDeletionPolicy {
return fmt.Errorf("the ClusterSelector mode doesn't support DeletionPolicy=DeleteCluster")
}
if rayJob.Spec.ClusterSelector != nil &&
rayJob.Spec.DeletionPolicy != nil && *rayJob.Spec.DeletionPolicy == rayv1.DeleteWorkersDeletionPolicy {
return fmt.Errorf("the ClusterSelector mode doesn't support DeletionPolicy=DeleteWorkers")
}
if rayJob.Spec.DeletionPolicy != nil && *rayJob.Spec.DeletionPolicy == rayv1.DeleteWorkersDeletionPolicy &&
rayJob.Spec.RayClusterSpec.EnableInTreeAutoscaling != nil && *rayJob.Spec.RayClusterSpec.EnableInTreeAutoscaling {
// TODO (rueian): This can be supported in future Ray. We should check the RayVersion once we know the version.
return fmt.Errorf("DeletionPolicy=DeleteWorkers currently does not support RayClusterSpec.EnableInTreeAutoscaling")
}
if rayJob.Spec.ShutdownAfterJobFinishes && rayJob.Spec.DeletionPolicy != nil && *rayJob.Spec.DeletionPolicy == rayv1.DeleteNoneDeletionPolicy {
return fmt.Errorf("shutdownAfterJobFinshes is set to 'true' while deletion policy is 'DeleteNone'")
}
Expand Down
38 changes: 38 additions & 0 deletions ray-operator/controllers/ray/rayjob_controller_unit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
utils "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils"
"github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/scheme"
"github.com/ray-project/kuberay/ray-operator/pkg/features"
)

func TestCreateRayJobSubmitterIfNeed(t *testing.T) {
Expand Down Expand Up @@ -367,6 +368,43 @@ func TestValidateRayJobSpec(t *testing.T) {
})
assert.ErrorContains(t, err, "backoffLimit must be a positive integer")

err = validateRayJobSpec(&rayv1.RayJob{
Spec: rayv1.RayJobSpec{
DeletionPolicy: ptr.To(rayv1.DeleteClusterDeletionPolicy),
ShutdownAfterJobFinishes: true,
RayClusterSpec: &rayv1.RayClusterSpec{},
},
})
assert.ErrorContains(t, err, "RayJobDeletionPolicy feature gate must be enabled to use the DeletionPolicy feature")

defer features.SetFeatureGateDuringTest(t, features.RayJobDeletionPolicy, true)()

err = validateRayJobSpec(&rayv1.RayJob{
Spec: rayv1.RayJobSpec{
DeletionPolicy: ptr.To(rayv1.DeleteClusterDeletionPolicy),
ClusterSelector: map[string]string{"key": "value"},
},
})
assert.ErrorContains(t, err, "the ClusterSelector mode doesn't support DeletionPolicy=DeleteCluster")

err = validateRayJobSpec(&rayv1.RayJob{
Spec: rayv1.RayJobSpec{
DeletionPolicy: ptr.To(rayv1.DeleteWorkersDeletionPolicy),
ClusterSelector: map[string]string{"key": "value"},
},
})
assert.ErrorContains(t, err, "the ClusterSelector mode doesn't support DeletionPolicy=DeleteWorkers")

err = validateRayJobSpec(&rayv1.RayJob{
Spec: rayv1.RayJobSpec{
DeletionPolicy: ptr.To(rayv1.DeleteWorkersDeletionPolicy),
RayClusterSpec: &rayv1.RayClusterSpec{
EnableInTreeAutoscaling: ptr.To[bool](true),
},
},
})
assert.ErrorContains(t, err, "DeletionPolicy=DeleteWorkers currently does not support RayClusterSpec.EnableInTreeAutoscaling")

err = validateRayJobSpec(&rayv1.RayJob{
Spec: rayv1.RayJobSpec{
DeletionPolicy: ptr.To(rayv1.DeleteClusterDeletionPolicy),
Expand Down

0 comments on commit f191a75

Please sign in to comment.