@@ -22,92 +22,14 @@ import (
2222
2323 "github.com/google/go-cmp/cmp"
2424 batchv1 "k8s.io/api/batch/v1"
25- corev1 "k8s.io/api/core/v1"
26- "k8s.io/apimachinery/pkg/api/resource"
27- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
28- "k8s.io/utils/ptr"
2925 kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
30- kueueconstants "sigs.k8s.io/kueue/pkg/controller/constants"
3126
3227 slice "tpu-slice-controller/api/v1beta1"
3328 "tpu-slice-controller/internal/core"
3429 utiltesting "tpu-slice-controller/internal/util/testing"
30+ testingjob "tpu-slice-controller/internal/util/testingjobs/job"
3531)
3632
37- type JobWrapper struct {
38- batchv1.Job
39- }
40-
41- func MakeJob (name , ns string ) * JobWrapper {
42- return & JobWrapper {
43- Job : batchv1.Job {
44- ObjectMeta : metav1.ObjectMeta {
45- Name : name ,
46- Namespace : ns ,
47- },
48- Spec : batchv1.JobSpec {
49- Template : corev1.PodTemplateSpec {
50- Spec : corev1.PodSpec {
51- RestartPolicy : corev1 .RestartPolicyNever ,
52- Containers : []corev1.Container {
53- {
54- Name : "c" ,
55- Image : "pause" ,
56- },
57- },
58- },
59- },
60- },
61- },
62- }
63- }
64-
65- func (j * JobWrapper ) Obj () * batchv1.Job {
66- return & j .Job
67- }
68-
69- func (j * JobWrapper ) Queue (q string ) * JobWrapper {
70- if j .Labels == nil {
71- j .Labels = make (map [string ]string )
72- }
73- j .Labels [kueueconstants .QueueLabel ] = q
74- return j
75- }
76-
77- func (j * JobWrapper ) Parallelism (p int32 ) * JobWrapper {
78- j .Spec .Parallelism = ptr .To (p )
79- return j
80- }
81-
82- func (j * JobWrapper ) PodAnnotation (k , v string ) * JobWrapper {
83- if j .Spec .Template .Annotations == nil {
84- j .Spec .Template .Annotations = make (map [string ]string )
85- }
86- j .Spec .Template .Annotations [k ] = v
87- return j
88- }
89-
90- func (j * JobWrapper ) NodeSelector (k , v string ) * JobWrapper {
91- if j .Spec .Template .Spec .NodeSelector == nil {
92- j .Spec .Template .Spec .NodeSelector = make (map [string ]string )
93- }
94- j .Spec .Template .Spec .NodeSelector [k ] = v
95- return j
96- }
97-
98- func (j * JobWrapper ) Request (r corev1.ResourceName , v string ) * JobWrapper {
99- if j .Spec .Template .Spec .Containers [0 ].Resources .Limits == nil {
100- j .Spec .Template .Spec .Containers [0 ].Resources .Limits = make (corev1.ResourceList )
101- }
102- j .Spec .Template .Spec .Containers [0 ].Resources .Limits [r ] = resource .MustParse (v )
103- return j
104- }
105-
106- func (j * JobWrapper ) NodeAffinity (key string , values []string ) * JobWrapper {
107- core .AddNodeAffinity (& j .Spec .Template , key , corev1 .NodeSelectorOpIn , values )
108- return j
109- }
110-
11133func TestJobDefault (t * testing.T ) {
11234 const (
11335 baseJobName = "job"
@@ -120,71 +42,71 @@ func TestJobDefault(t *testing.T) {
12042 wantErr error
12143 }{
12244 "no queue label" : {
123- job : MakeJob (baseJobName , baseNamespace ).
45+ job : testingjob . MakeJob (baseJobName , baseNamespace ).
12446 Parallelism (48 ).
12547 PodAnnotation (core .TPUSliceTopologyAnnotation , "4x4x12" ).
12648 NodeSelector ("cloud.google.com/gke-tpu-accelerator" , string (slice .TypeTpu7x )).
12749 Obj (),
128- wantJob : MakeJob (baseJobName , baseNamespace ).
50+ wantJob : testingjob . MakeJob (baseJobName , baseNamespace ).
12951 Parallelism (48 ).
13052 PodAnnotation (core .TPUSliceTopologyAnnotation , "4x4x12" ).
13153 NodeSelector ("cloud.google.com/gke-tpu-accelerator" , string (slice .TypeTpu7x )).
13254 Obj (),
13355 },
13456 "no tpu topology annotation" : {
135- job : MakeJob (baseJobName , baseNamespace ).
57+ job : testingjob . MakeJob (baseJobName , baseNamespace ).
13658 Queue ("queue-name" ).
13759 Parallelism (48 ).
13860 NodeSelector ("cloud.google.com/gke-tpu-accelerator" , string (slice .TypeTpu7x )).
13961 Obj (),
140- wantJob : MakeJob (baseJobName , baseNamespace ).
62+ wantJob : testingjob . MakeJob (baseJobName , baseNamespace ).
14163 Queue ("queue-name" ).
14264 Parallelism (48 ).
14365 NodeSelector ("cloud.google.com/gke-tpu-accelerator" , string (slice .TypeTpu7x )).
14466 Obj (),
14567 },
14668 "no tpu accelerator node selector label" : {
147- job : MakeJob (baseJobName , baseNamespace ).
69+ job : testingjob . MakeJob (baseJobName , baseNamespace ).
14870 Queue ("queue-name" ).
14971 Parallelism (48 ).
15072 PodAnnotation (core .TPUSliceTopologyAnnotation , "4x4x12" ).
15173 Obj (),
152- wantJob : MakeJob (baseJobName , baseNamespace ).
74+ wantJob : testingjob . MakeJob (baseJobName , baseNamespace ).
15375 Queue ("queue-name" ).
15476 Parallelism (48 ).
15577 PodAnnotation (core .TPUSliceTopologyAnnotation , "4x4x12" ).
15678 Obj (),
15779 },
15880 "should set default values" : {
159- job : MakeJob (baseJobName , baseNamespace ).
81+ job : testingjob . MakeJob (baseJobName , baseNamespace ).
16082 Queue ("queue-name" ).
16183 Parallelism (48 ).
16284 PodAnnotation (core .TPUSliceTopologyAnnotation , "4x4x12" ).
16385 NodeSelector ("cloud.google.com/gke-tpu-accelerator" , string (slice .TypeTpu7x )).
164- Request (core .TPUResourceName , "4" ).
86+ Limit (core .TPUResourceName , "4" ).
16587 Obj (),
166- wantJob : MakeJob (baseJobName , baseNamespace ).
88+ wantJob : testingjob . MakeJob (baseJobName , baseNamespace ).
16789 Queue ("queue-name" ).
16890 Parallelism (48 ).
16991 PodAnnotation (core .TPUSliceTopologyAnnotation , "4x4x12" ).
17092 PodAnnotation (kueue .PodSetRequiredTopologyAnnotation , core .TPUBlockLabel ).
17193 PodAnnotation (kueue .PodSetSliceRequiredTopologyAnnotation , core .TPUSubBlockLabel ).
17294 PodAnnotation (kueue .PodSetSliceSizeAnnotation , "16" ).
17395 NodeSelector ("cloud.google.com/gke-tpu-accelerator" , string (slice .TypeTpu7x )).
174- Request (core .TPUResourceName , "4" ).
96+ Limit (core .TPUResourceName , "4" ).
17597 NodeAffinity (core .TPUSliceHealthNodeSelectorKey , []string {core .TPUSliceHealthNodeSelectorHealthy }).
17698 Obj (),
17799 },
178100 "should respect existing NodeSelector for health" : {
179- job : MakeJob (baseJobName , baseNamespace ).
101+ job : testingjob . MakeJob (baseJobName , baseNamespace ).
180102 Queue ("queue-name" ).
181103 Parallelism (48 ).
182104 PodAnnotation (core .TPUSliceTopologyAnnotation , "4x4x12" ).
183105 NodeSelector ("cloud.google.com/gke-tpu-accelerator" , string (slice .TypeTpu7x )).
184106 NodeSelector (core .TPUSliceHealthNodeSelectorKey , "HEALTHY" ).
185- Request (core .TPUResourceName , "4" ).
107+ Limit (core .TPUResourceName , "4" ).
186108 Obj (),
187- wantJob : MakeJob (baseJobName , baseNamespace ).
109+ wantJob : testingjob . MakeJob (baseJobName , baseNamespace ).
188110 Queue ("queue-name" ).
189111 Parallelism (48 ).
190112 PodAnnotation (core .TPUSliceTopologyAnnotation , "4x4x12" ).
@@ -193,19 +115,19 @@ func TestJobDefault(t *testing.T) {
193115 PodAnnotation (kueue .PodSetSliceSizeAnnotation , "16" ).
194116 NodeSelector ("cloud.google.com/gke-tpu-accelerator" , string (slice .TypeTpu7x )).
195117 NodeSelector (core .TPUSliceHealthNodeSelectorKey , "HEALTHY" ).
196- Request (core .TPUResourceName , "4" ).
118+ Limit (core .TPUResourceName , "4" ).
197119 Obj (),
198120 },
199121 "should respect existing NodeAffinity for health" : {
200- job : MakeJob (baseJobName , baseNamespace ).
122+ job : testingjob . MakeJob (baseJobName , baseNamespace ).
201123 Queue ("queue-name" ).
202124 Parallelism (48 ).
203125 PodAnnotation (core .TPUSliceTopologyAnnotation , "4x4x12" ).
204126 NodeSelector ("cloud.google.com/gke-tpu-accelerator" , string (slice .TypeTpu7x )).
205127 NodeAffinity (core .TPUSliceHealthNodeSelectorKey , []string {"HEALTHY" , "DEGRADED" }).
206- Request (core .TPUResourceName , "4" ).
128+ Limit (core .TPUResourceName , "4" ).
207129 Obj (),
208- wantJob : MakeJob (baseJobName , baseNamespace ).
130+ wantJob : testingjob . MakeJob (baseJobName , baseNamespace ).
209131 Queue ("queue-name" ).
210132 Parallelism (48 ).
211133 PodAnnotation (core .TPUSliceTopologyAnnotation , "4x4x12" ).
@@ -214,16 +136,16 @@ func TestJobDefault(t *testing.T) {
214136 PodAnnotation (kueue .PodSetSliceSizeAnnotation , "16" ).
215137 NodeSelector ("cloud.google.com/gke-tpu-accelerator" , string (slice .TypeTpu7x )).
216138 NodeAffinity (core .TPUSliceHealthNodeSelectorKey , []string {"HEALTHY" , "DEGRADED" }).
217- Request (core .TPUResourceName , "4" ).
139+ Limit (core .TPUResourceName , "4" ).
218140 Obj (),
219141 },
220142 "should reject incorrectly configured job not utilizing entire cube; single cube" : {
221- job : MakeJob (baseJobName , baseNamespace ).
143+ job : testingjob . MakeJob (baseJobName , baseNamespace ).
222144 Queue ("queue-name" ).
223145 Parallelism (16 ).
224146 PodAnnotation (core .TPUSliceTopologyAnnotation , "4x4x4" ).
225147 NodeSelector ("cloud.google.com/gke-tpu-accelerator" , string (slice .TypeTpu7x )).
226- Request (core .TPUResourceName , "1" ).
148+ Limit (core .TPUResourceName , "1" ).
227149 Obj (),
228150 wantErr : errors .New ("invalid job \" job\" : configuration results in 16 TPUs requested per cube, but must be exactly 64 TPUs (full utilization)" ),
229151 },
0 commit comments