Skip to content

Commit 8414c94

Browse files
authored
[slice] Remove duplicated test code (#1103)
* Remove duplicated test code * Remove reduntand code
1 parent 6afde56 commit 8414c94

3 files changed

Lines changed: 42 additions & 131 deletions

File tree

slice/internal/util/testingjobs/job/wrappers.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ import (
2323
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2424
"k8s.io/utils/ptr"
2525
kueueconstants "sigs.k8s.io/kueue/pkg/controller/constants"
26+
27+
"tpu-slice-controller/internal/core"
2628
)
2729

2830
// JobWrapper wraps a Job.
@@ -125,3 +127,8 @@ func (j *JobWrapper) Args(args ...string) *JobWrapper {
125127
j.Spec.Template.Spec.Containers[0].Args = args
126128
return j
127129
}
130+
131+
func (j *JobWrapper) NodeAffinity(key string, values []string) *JobWrapper {
132+
core.AddNodeAffinity(&j.Spec.Template, key, corev1.NodeSelectorOpIn, values)
133+
return j
134+
}

slice/internal/webhooks/job_webhook_test.go

Lines changed: 21 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -22,92 +22,14 @@ import (
2222

2323
"github.com/google/go-cmp/cmp"
2424
batchv1 "k8s.io/api/batch/v1"
25-
corev1 "k8s.io/api/core/v1"
26-
"k8s.io/apimachinery/pkg/api/resource"
27-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
28-
"k8s.io/utils/ptr"
2925
kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
30-
kueueconstants "sigs.k8s.io/kueue/pkg/controller/constants"
3126

3227
slice "tpu-slice-controller/api/v1beta1"
3328
"tpu-slice-controller/internal/core"
3429
utiltesting "tpu-slice-controller/internal/util/testing"
30+
testingjob "tpu-slice-controller/internal/util/testingjobs/job"
3531
)
3632

37-
type JobWrapper struct {
38-
batchv1.Job
39-
}
40-
41-
func MakeJob(name, ns string) *JobWrapper {
42-
return &JobWrapper{
43-
Job: batchv1.Job{
44-
ObjectMeta: metav1.ObjectMeta{
45-
Name: name,
46-
Namespace: ns,
47-
},
48-
Spec: batchv1.JobSpec{
49-
Template: corev1.PodTemplateSpec{
50-
Spec: corev1.PodSpec{
51-
RestartPolicy: corev1.RestartPolicyNever,
52-
Containers: []corev1.Container{
53-
{
54-
Name: "c",
55-
Image: "pause",
56-
},
57-
},
58-
},
59-
},
60-
},
61-
},
62-
}
63-
}
64-
65-
func (j *JobWrapper) Obj() *batchv1.Job {
66-
return &j.Job
67-
}
68-
69-
func (j *JobWrapper) Queue(q string) *JobWrapper {
70-
if j.Labels == nil {
71-
j.Labels = make(map[string]string)
72-
}
73-
j.Labels[kueueconstants.QueueLabel] = q
74-
return j
75-
}
76-
77-
func (j *JobWrapper) Parallelism(p int32) *JobWrapper {
78-
j.Spec.Parallelism = ptr.To(p)
79-
return j
80-
}
81-
82-
func (j *JobWrapper) PodAnnotation(k, v string) *JobWrapper {
83-
if j.Spec.Template.Annotations == nil {
84-
j.Spec.Template.Annotations = make(map[string]string)
85-
}
86-
j.Spec.Template.Annotations[k] = v
87-
return j
88-
}
89-
90-
func (j *JobWrapper) NodeSelector(k, v string) *JobWrapper {
91-
if j.Spec.Template.Spec.NodeSelector == nil {
92-
j.Spec.Template.Spec.NodeSelector = make(map[string]string)
93-
}
94-
j.Spec.Template.Spec.NodeSelector[k] = v
95-
return j
96-
}
97-
98-
func (j *JobWrapper) Request(r corev1.ResourceName, v string) *JobWrapper {
99-
if j.Spec.Template.Spec.Containers[0].Resources.Limits == nil {
100-
j.Spec.Template.Spec.Containers[0].Resources.Limits = make(corev1.ResourceList)
101-
}
102-
j.Spec.Template.Spec.Containers[0].Resources.Limits[r] = resource.MustParse(v)
103-
return j
104-
}
105-
106-
func (j *JobWrapper) NodeAffinity(key string, values []string) *JobWrapper {
107-
core.AddNodeAffinity(&j.Spec.Template, key, corev1.NodeSelectorOpIn, values)
108-
return j
109-
}
110-
11133
func TestJobDefault(t *testing.T) {
11234
const (
11335
baseJobName = "job"
@@ -120,71 +42,71 @@ func TestJobDefault(t *testing.T) {
12042
wantErr error
12143
}{
12244
"no queue label": {
123-
job: MakeJob(baseJobName, baseNamespace).
45+
job: testingjob.MakeJob(baseJobName, baseNamespace).
12446
Parallelism(48).
12547
PodAnnotation(core.TPUSliceTopologyAnnotation, "4x4x12").
12648
NodeSelector("cloud.google.com/gke-tpu-accelerator", string(slice.TypeTpu7x)).
12749
Obj(),
128-
wantJob: MakeJob(baseJobName, baseNamespace).
50+
wantJob: testingjob.MakeJob(baseJobName, baseNamespace).
12951
Parallelism(48).
13052
PodAnnotation(core.TPUSliceTopologyAnnotation, "4x4x12").
13153
NodeSelector("cloud.google.com/gke-tpu-accelerator", string(slice.TypeTpu7x)).
13254
Obj(),
13355
},
13456
"no tpu topology annotation": {
135-
job: MakeJob(baseJobName, baseNamespace).
57+
job: testingjob.MakeJob(baseJobName, baseNamespace).
13658
Queue("queue-name").
13759
Parallelism(48).
13860
NodeSelector("cloud.google.com/gke-tpu-accelerator", string(slice.TypeTpu7x)).
13961
Obj(),
140-
wantJob: MakeJob(baseJobName, baseNamespace).
62+
wantJob: testingjob.MakeJob(baseJobName, baseNamespace).
14163
Queue("queue-name").
14264
Parallelism(48).
14365
NodeSelector("cloud.google.com/gke-tpu-accelerator", string(slice.TypeTpu7x)).
14466
Obj(),
14567
},
14668
"no tpu accelerator node selector label": {
147-
job: MakeJob(baseJobName, baseNamespace).
69+
job: testingjob.MakeJob(baseJobName, baseNamespace).
14870
Queue("queue-name").
14971
Parallelism(48).
15072
PodAnnotation(core.TPUSliceTopologyAnnotation, "4x4x12").
15173
Obj(),
152-
wantJob: MakeJob(baseJobName, baseNamespace).
74+
wantJob: testingjob.MakeJob(baseJobName, baseNamespace).
15375
Queue("queue-name").
15476
Parallelism(48).
15577
PodAnnotation(core.TPUSliceTopologyAnnotation, "4x4x12").
15678
Obj(),
15779
},
15880
"should set default values": {
159-
job: MakeJob(baseJobName, baseNamespace).
81+
job: testingjob.MakeJob(baseJobName, baseNamespace).
16082
Queue("queue-name").
16183
Parallelism(48).
16284
PodAnnotation(core.TPUSliceTopologyAnnotation, "4x4x12").
16385
NodeSelector("cloud.google.com/gke-tpu-accelerator", string(slice.TypeTpu7x)).
164-
Request(core.TPUResourceName, "4").
86+
Limit(core.TPUResourceName, "4").
16587
Obj(),
166-
wantJob: MakeJob(baseJobName, baseNamespace).
88+
wantJob: testingjob.MakeJob(baseJobName, baseNamespace).
16789
Queue("queue-name").
16890
Parallelism(48).
16991
PodAnnotation(core.TPUSliceTopologyAnnotation, "4x4x12").
17092
PodAnnotation(kueue.PodSetRequiredTopologyAnnotation, core.TPUBlockLabel).
17193
PodAnnotation(kueue.PodSetSliceRequiredTopologyAnnotation, core.TPUSubBlockLabel).
17294
PodAnnotation(kueue.PodSetSliceSizeAnnotation, "16").
17395
NodeSelector("cloud.google.com/gke-tpu-accelerator", string(slice.TypeTpu7x)).
174-
Request(core.TPUResourceName, "4").
96+
Limit(core.TPUResourceName, "4").
17597
NodeAffinity(core.TPUSliceHealthNodeSelectorKey, []string{core.TPUSliceHealthNodeSelectorHealthy}).
17698
Obj(),
17799
},
178100
"should respect existing NodeSelector for health": {
179-
job: MakeJob(baseJobName, baseNamespace).
101+
job: testingjob.MakeJob(baseJobName, baseNamespace).
180102
Queue("queue-name").
181103
Parallelism(48).
182104
PodAnnotation(core.TPUSliceTopologyAnnotation, "4x4x12").
183105
NodeSelector("cloud.google.com/gke-tpu-accelerator", string(slice.TypeTpu7x)).
184106
NodeSelector(core.TPUSliceHealthNodeSelectorKey, "HEALTHY").
185-
Request(core.TPUResourceName, "4").
107+
Limit(core.TPUResourceName, "4").
186108
Obj(),
187-
wantJob: MakeJob(baseJobName, baseNamespace).
109+
wantJob: testingjob.MakeJob(baseJobName, baseNamespace).
188110
Queue("queue-name").
189111
Parallelism(48).
190112
PodAnnotation(core.TPUSliceTopologyAnnotation, "4x4x12").
@@ -193,19 +115,19 @@ func TestJobDefault(t *testing.T) {
193115
PodAnnotation(kueue.PodSetSliceSizeAnnotation, "16").
194116
NodeSelector("cloud.google.com/gke-tpu-accelerator", string(slice.TypeTpu7x)).
195117
NodeSelector(core.TPUSliceHealthNodeSelectorKey, "HEALTHY").
196-
Request(core.TPUResourceName, "4").
118+
Limit(core.TPUResourceName, "4").
197119
Obj(),
198120
},
199121
"should respect existing NodeAffinity for health": {
200-
job: MakeJob(baseJobName, baseNamespace).
122+
job: testingjob.MakeJob(baseJobName, baseNamespace).
201123
Queue("queue-name").
202124
Parallelism(48).
203125
PodAnnotation(core.TPUSliceTopologyAnnotation, "4x4x12").
204126
NodeSelector("cloud.google.com/gke-tpu-accelerator", string(slice.TypeTpu7x)).
205127
NodeAffinity(core.TPUSliceHealthNodeSelectorKey, []string{"HEALTHY", "DEGRADED"}).
206-
Request(core.TPUResourceName, "4").
128+
Limit(core.TPUResourceName, "4").
207129
Obj(),
208-
wantJob: MakeJob(baseJobName, baseNamespace).
130+
wantJob: testingjob.MakeJob(baseJobName, baseNamespace).
209131
Queue("queue-name").
210132
Parallelism(48).
211133
PodAnnotation(core.TPUSliceTopologyAnnotation, "4x4x12").
@@ -214,16 +136,16 @@ func TestJobDefault(t *testing.T) {
214136
PodAnnotation(kueue.PodSetSliceSizeAnnotation, "16").
215137
NodeSelector("cloud.google.com/gke-tpu-accelerator", string(slice.TypeTpu7x)).
216138
NodeAffinity(core.TPUSliceHealthNodeSelectorKey, []string{"HEALTHY", "DEGRADED"}).
217-
Request(core.TPUResourceName, "4").
139+
Limit(core.TPUResourceName, "4").
218140
Obj(),
219141
},
220142
"should reject incorrectly configured job not utilizing entire cube; single cube": {
221-
job: MakeJob(baseJobName, baseNamespace).
143+
job: testingjob.MakeJob(baseJobName, baseNamespace).
222144
Queue("queue-name").
223145
Parallelism(16).
224146
PodAnnotation(core.TPUSliceTopologyAnnotation, "4x4x4").
225147
NodeSelector("cloud.google.com/gke-tpu-accelerator", string(slice.TypeTpu7x)).
226-
Request(core.TPUResourceName, "1").
148+
Limit(core.TPUResourceName, "1").
227149
Obj(),
228150
wantErr: errors.New("invalid job \"job\": configuration results in 16 TPUs requested per cube, but must be exactly 64 TPUs (full utilization)"),
229151
},

slice/test/utils/e2e.go

Lines changed: 14 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -120,12 +120,12 @@ func waitForOperatorAvailability(ctx context.Context, k8sClient client.Client, k
120120
ginkgo.GinkgoLogr.Info("Deployment is available in the cluster", "deployment", key, "waitingTime", time.Since(waitForAvailableStart))
121121
}
122122

123-
func LabelNodesWithTopology(ctx context.Context, k8sClient client.Client, topology string, nodeNames []string) {
123+
func LabelNodes(ctx context.Context, k8sClient client.Client, key, value string, nodeNames []string) {
124124
for _, nodeName := range nodeNames {
125125
gomega.Eventually(func(g gomega.Gomega) {
126126
n := &corev1.Node{}
127127
g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: nodeName}, n)).To(gomega.Succeed())
128-
metav1.SetMetaDataLabel(&n.ObjectMeta, core.TPUTopologyAnnotation, topology)
128+
metav1.SetMetaDataLabel(&n.ObjectMeta, key, value)
129129
g.Expect(k8sClient.Update(ctx, n)).To(gomega.Succeed())
130130
}, Timeout, Interval).Should(gomega.Succeed())
131131
}
@@ -134,7 +134,7 @@ func LabelNodesWithTopology(ctx context.Context, k8sClient client.Client, topolo
134134
gomega.Eventually(func(g gomega.Gomega) {
135135
n := &corev1.Node{}
136136
g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: nodeName}, n)).To(gomega.Succeed())
137-
delete(n.Labels, core.TPUTopologyAnnotation)
137+
delete(n.Labels, key)
138138
g.Expect(k8sClient.Update(ctx, n)).To(gomega.Succeed())
139139
}, Timeout, Interval).Should(gomega.Succeed())
140140
}
@@ -166,45 +166,27 @@ func SetSliceReady(ctx context.Context, k8sClient client.Client, sliceKey client
166166
nodeNames = append(nodeNames, node.Name)
167167
}
168168
}
169-
LabelNodesWithTopology(ctx, k8sClient, topology, nodeNames)
170-
LabelNodesWithSlice(ctx, k8sClient, createdSlice.Name, nodeNames)
169+
LabelNodes(ctx, k8sClient, core.TPUTopologyAnnotation, topology, nodeNames)
170+
LabelNodes(ctx, k8sClient, core.TPUSliceNodeLabel, createdSlice.Name, nodeNames)
171171
}
172172

173-
func ExpectJobSetHasAntiAffinity(js *jobset.JobSet) {
173+
func expectAntiAffinity(template *corev1.PodTemplateSpec, name, kind string) {
174174
ginkgo.GinkgoHelper()
175-
for _, replicatedJob := range js.Spec.ReplicatedJobs {
176-
req := core.FindNodeAffinityRequirement(&replicatedJob.Template.Spec.Template, core.TPUSliceNodeLabel)
177-
gomega.ExpectWithOffset(1, req).ShouldNot(gomega.BeNil(), "replicated job %q should have anti-affinity for key %s", replicatedJob.Name, core.TPUSliceNodeLabel)
178-
gomega.ExpectWithOffset(1, req.Operator).Should(gomega.Equal(corev1.NodeSelectorOpDoesNotExist))
179-
}
175+
req := core.FindNodeAffinityRequirement(template, core.TPUSliceNodeLabel)
176+
gomega.Expect(req).ShouldNot(gomega.BeNil(), "%s %q should have anti-affinity for key %s", kind, name, core.TPUSliceNodeLabel)
177+
gomega.Expect(req.Operator).Should(gomega.Equal(corev1.NodeSelectorOpDoesNotExist))
180178
}
181179

182-
func LabelNodesWithSlice(ctx context.Context, k8sClient client.Client, sliceName string, nodeNames []string) {
183-
for _, nodeName := range nodeNames {
184-
gomega.Eventually(func(g gomega.Gomega) {
185-
n := &corev1.Node{}
186-
g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: nodeName}, n)).To(gomega.Succeed())
187-
metav1.SetMetaDataLabel(&n.ObjectMeta, core.TPUSliceNodeLabel, sliceName)
188-
g.Expect(k8sClient.Update(ctx, n)).To(gomega.Succeed())
189-
}, Timeout, Interval).Should(gomega.Succeed())
180+
func ExpectJobSetHasAntiAffinity(js *jobset.JobSet) {
181+
ginkgo.GinkgoHelper()
182+
for _, replicatedJob := range js.Spec.ReplicatedJobs {
183+
expectAntiAffinity(&replicatedJob.Template.Spec.Template, replicatedJob.Name, "replicated job")
190184
}
191-
ginkgo.DeferCleanup(func(ctx context.Context, k8sClient client.Client, nodeNames []string) {
192-
for _, nodeName := range nodeNames {
193-
gomega.Eventually(func(g gomega.Gomega) {
194-
n := &corev1.Node{}
195-
g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: nodeName}, n)).To(gomega.Succeed())
196-
delete(n.Labels, core.TPUSliceNodeLabel)
197-
g.Expect(k8sClient.Update(ctx, n)).To(gomega.Succeed())
198-
}, Timeout, Interval).Should(gomega.Succeed())
199-
}
200-
}, k8sClient, nodeNames)
201185
}
202186

203187
func ExpectWorkloadHasAntiAffinity(wl *kueue.Workload) {
204188
ginkgo.GinkgoHelper()
205189
for _, ps := range wl.Spec.PodSets {
206-
req := core.FindNodeAffinityRequirement(&ps.Template, core.TPUSliceNodeLabel)
207-
gomega.ExpectWithOffset(1, req).ShouldNot(gomega.BeNil(), "podset %q should have anti-affinity for key %s", ps.Name, core.TPUSliceNodeLabel)
208-
gomega.ExpectWithOffset(1, req.Operator).Should(gomega.Equal(corev1.NodeSelectorOpDoesNotExist))
190+
expectAntiAffinity(&ps.Template, string(ps.Name), "podset")
209191
}
210192
}

0 commit comments

Comments
 (0)