@@ -1478,144 +1478,6 @@ var _ = ginkgo.Describe("JobSet", func() {
14781478 })
14791479 })
14801480
1481- ginkgo .It ("should only admit the workload if nodes are not used in some other slices" , func () {
1482- nodes := & corev1.NodeList {}
1483- gomega .Expect (k8sClient .List (ctx , nodes , client.HasLabels {core .TPUSubBlockLabel })).To (gomega .Succeed ())
1484- var partitionIDs []string
1485- for _ , node := range nodes .Items {
1486- partitionIDs = append (partitionIDs , node .Labels [core .TPUSubBlockLabel ])
1487- }
1488- gomega .Expect (partitionIDs ).ToNot (gomega .BeEmpty ())
1489- manualSlice := & slice.Slice {
1490- ObjectMeta : metav1.ObjectMeta {
1491- Name : "manual-slice-blocking" ,
1492- },
1493- Spec : slice.SliceSpec {
1494- Type : slice .TypeTpu7x ,
1495- Topology : "4x4x4" ,
1496- PartitionIds : partitionIDs ,
1497- },
1498- }
1499- utils .MustCreate (ctx , k8sClient , manualSlice )
1500- utils .SetSliceReady (ctx , k8sClient , client .ObjectKeyFromObject (manualSlice ), manualSlice .Spec .Topology )
1501-
1502- jobSet := testingjobsjobset .MakeJobSet ("jobset" , ns .Name ).
1503- Queue (lq .Name ).
1504- ReplicatedJobs (
1505- testingjobsjobset.ReplicatedJobRequirements {
1506- Name : "rj1" ,
1507- Image : utils .E2eTestAgnHostImage ,
1508- Args : utils .BehaviorWaitForDeletion ,
1509- Replicas : 1 ,
1510- Parallelism : 16 ,
1511- Completions : 16 ,
1512- PodAnnotations : map [string ]string {
1513- core .TPUSliceTopologyAnnotation : "4x4x4" ,
1514- },
1515- NodeSelector : map [string ]string {
1516- core .TPUAcceleratorLabel : string (slice .TypeTpu7x ),
1517- core .TPUSliceHealthNodeSelectorKey : core .TPUSliceHealthNodeSelectorHealthy ,
1518- },
1519- },
1520- ).
1521- RequestAndLimit ("rj1" , core .TPUResourceName , "4" ).
1522- Obj ()
1523-
1524- ginkgo .By ("Creating a JobSet" , func () {
1525- utils .MustCreate (ctx , k8sClient , jobSet )
1526- })
1527-
1528- createdWorkload := & kueue.Workload {}
1529- wlKey := types.NamespacedName {
1530- Name : jobsetcontroller .GetWorkloadNameForJobSet (jobSet .Name , jobSet .UID ),
1531- Namespace : ns .Name ,
1532- }
1533-
1534- ginkgo .By ("Check that the Workload is not admissible" , func () {
1535- gomega .Consistently (func (g gomega.Gomega ) {
1536- err := k8sClient .Get (ctx , wlKey , createdWorkload )
1537- if err != nil {
1538- g .Expect (client .IgnoreNotFound (err )).To (gomega .Succeed ())
1539- } else {
1540- g .Expect (createdWorkload .Status .Admission ).Should (gomega .BeNil ())
1541- }
1542- }, utils .ConsistentDuration , utils .Interval ).Should (gomega .Succeed ())
1543- })
1544-
1545- ginkgo .By ("Deleting the manual slice and unlabeling nodes" , func () {
1546- utils .ExpectObjectToBeDeleted (ctx , k8sClient , manualSlice , true )
1547- for _ , node := range nodes .Items {
1548- gomega .Eventually (func (g gomega.Gomega ) {
1549- n := & corev1.Node {}
1550- g .Expect (k8sClient .Get (ctx , client .ObjectKeyFromObject (& node ), n )).To (gomega .Succeed ())
1551- delete (n .Labels , core .TPUSliceNodeLabel )
1552- delete (n .Labels , core .TPUTopologyAnnotation )
1553- g .Expect (k8sClient .Update (ctx , n )).To (gomega .Succeed ())
1554- }, utils .Timeout , utils .Interval ).Should (gomega .Succeed ())
1555- }
1556- })
1557-
1558- ginkgo .By ("Waiting for Admission of the Workload" , func () {
1559- gomega .Eventually (func (g gomega.Gomega ) {
1560- g .Expect (k8sClient .Get (ctx , wlKey , createdWorkload )).Should (gomega .Succeed ())
1561- g .Expect (createdWorkload .Status .Admission ).ShouldNot (gomega .BeNil ())
1562- }, utils .Timeout , utils .Interval ).Should (gomega .Succeed ())
1563- })
1564-
1565- createdSlice := & slice.Slice {}
1566- sliceKey := core .SliceKeyFromWorkload (createdWorkload , "rj1" , 0 )
1567-
1568- ginkgo .By ("Checking that Slice is created" , func () {
1569- gomega .Eventually (func (g gomega.Gomega ) {
1570- g .Expect (k8sClient .Get (ctx , sliceKey , createdSlice )).To (gomega .Succeed ())
1571- g .Expect (createdSlice .Spec .PartitionIds ).To (gomega .HaveLen (1 ))
1572- g .Expect (createdSlice .Spec .Topology ).To (gomega .Equal ("4x4x4" ))
1573- g .Expect (createdSlice .Spec .Type ).To (gomega .Equal (slice .TypeTpu7x ))
1574- }, utils .Timeout , utils .Interval ).Should (gomega .Succeed ())
1575- })
1576-
1577- ginkgo .By ("Adding Ready condition" , func () {
1578- utils .SetSliceReady (ctx , k8sClient , sliceKey , "4x4x4" )
1579- })
1580-
1581- ginkgo .By ("Checking that the Workload is admitted and admission check status is ready" , func () {
1582- gomega .Eventually (func (g gomega.Gomega ) {
1583- g .Expect (k8sClient .Get (ctx , wlKey , createdWorkload )).Should (gomega .Succeed ())
1584- g .Expect (workload .IsAdmitted (createdWorkload )).Should (gomega .BeTrue ())
1585- g .Expect (createdWorkload .Status .AdmissionChecks ).Should (gomega .BeComparableTo ([]kueue.AdmissionCheckState {{
1586- Name : kueue .AdmissionCheckReference (ac .Name ),
1587- State : kueue .CheckStateReady ,
1588- Message : `Slices are in states: 1 ACTIVE` ,
1589- }}, cmpopts .IgnoreFields (kueue.AdmissionCheckState {}, "LastTransitionTime" , "PodSetUpdates" )))
1590- }, utils .LongTimeout , utils .Timeout ).Should (gomega .Succeed ())
1591- })
1592-
1593- ginkgo .By ("Checking that all pods are running with topology node selector and without anti-affinity" , func () {
1594- pods := & corev1.PodList {}
1595- gomega .Eventually (func (g gomega.Gomega ) {
1596- g .Expect (k8sClient .List (ctx , pods , client .InNamespace (ns .Name ))).To (gomega .Succeed ())
1597- g .Expect (pods .Items ).Should (gomega .HaveLen (int (16 )))
1598- for _ , pod := range pods .Items {
1599- g .Expect (pod .Spec .NodeSelector ).To (gomega .HaveKeyWithValue (core .TPUTopologyAnnotation , "4x4x4" ))
1600- g .Expect (pod .Spec .Affinity ).To (gomega .BeNil ())
1601- g .Expect (pod .Status .Phase ).To (gomega .Equal (corev1 .PodRunning ))
1602- }
1603- }, utils .LongTimeout , utils .Interval ).Should (gomega .Succeed ())
1604- })
1605-
1606- ginkgo .By ("Deleting JobSet" , func () {
1607- utils .ExpectObjectToBeDeleted (ctx , k8sClient , jobSet , true )
1608- })
1609-
1610- ginkgo .By ("Checking that Slice is deleted" , func () {
1611- utils .ExpectObjectToBeDeleted (ctx , k8sClient , createdSlice , false )
1612- })
1613-
1614- ginkgo .By ("Checking that Workload is deleted" , func () {
1615- utils .ExpectObjectToBeDeleted (ctx , k8sClient , createdWorkload , false )
1616- })
1617- })
1618-
16191481 ginkgo .It ("should handle mixed tolerance for degraded slices across multiple PodSets" , func () {
16201482 jobSet := testingjobsjobset .MakeJobSet ("jobset" , ns .Name ).
16211483 Queue (lq .Name ).
0 commit comments