-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathwrapped-failing-pytorch-job.yaml
43 lines (43 loc) · 1.21 KB
/
wrapped-failing-pytorch-job.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
apiVersion: workload.codeflare.dev/v1beta2
kind: AppWrapper
metadata:
name: sample-failing-pytorch-job
labels:
kueue.x-k8s.io/queue-name: default-queue
spec:
components:
- template:
apiVersion: "kubeflow.org/v1"
kind: PyTorchJob
metadata:
name: pytorch-simple
spec:
pytorchReplicaSpecs:
Master:
replicas: 1
restartPolicy: Never
template:
spec:
containers:
- name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v1beta1-fc858d1
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"
- "--epochs=1"
resources:
requests:
cpu: 1
Worker:
replicas: 1
restartPolicy: Never
template:
spec:
containers:
- name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v1beta1-fc858d1
command:
- sleep 10; exit 1
resources:
requests:
cpu: 1