@@ -806,30 +806,45 @@ func (r *WorkloadReconciler) syncAdmissionCheckStatus(ctx context.Context, wl *k
806806 return nil
807807}
808808
809+ func calculateEffectiveSliceCounts (slicesByState map [core.SliceState ][]v1beta1.Slice , wl * kueue.Workload , podSetRequiresHealthy map [string ]bool ) (int , int ) {
810+ effectiveActiveCount := len (slicesByState [core .SliceStateActive ])
811+ effectiveFailedCount := len (slicesByState [core .SliceStateFailed ])
812+
813+ if features .Enabled (features .FailOnUntoleratedDegradedSlice ) {
814+ for _ , slice := range slicesByState [core .SliceStateActiveDegraded ] {
815+ psName := slice .Annotations [core .OwnerPodSetNameAnnotation ]
816+ if healthySliceRequired (psName , podSetRequiresHealthy , wl ) {
817+ effectiveFailedCount ++
818+ } else {
819+ effectiveActiveCount ++
820+ }
821+ }
822+ } else {
823+ effectiveActiveCount += len (slicesByState [core .SliceStateActiveDegraded ])
824+ }
825+ return effectiveActiveCount , effectiveFailedCount
826+ }
827+
809828func (r * WorkloadReconciler ) prepareAdmissionCheckStatus (ctx context.Context , wl * kueue.Workload , ac * kueue.AdmissionCheckState , slices []v1beta1.Slice , desiredSlicesCount int ) {
810829 log := ctrl .LoggerFrom (ctx ).V (2 )
811830 // wait for Kueue to reset check to Pending after eviction
812831 if ac .State == kueue .CheckStateRetry {
813832 return
814833 }
815834 slicesByState := core .GroupSlicesByState (slices , r .activationTimeout )
835+ podSetRequiresHealthy := make (map [string ]bool )
836+ if features .Enabled (features .FailOnUntoleratedDegradedSlice ) {
837+ for _ , ps := range wl .Spec .PodSets {
838+ podSetRequiresHealthy [string (ps .Name )] = podSetRequestedOnlyHealthySlices (ps )
839+ }
840+ }
841+ effectiveActiveCount , effectiveFailedCount := calculateEffectiveSliceCounts (slicesByState , wl , podSetRequiresHealthy )
816842
817843 switch {
818- case desiredSlicesCount == len ( slicesByState [ core . SliceStateActive ]) + len ( slicesByState [ core . SliceStateActiveDegraded ]) :
844+ case desiredSlicesCount == effectiveActiveCount :
819845 ac .State = kueue .CheckStateReady
820- var podSetUpdates []kueue.PodSetUpdate
821- for _ , ps := range wl .Spec .PodSets {
822- if topology := core .GetTPUTopology (ps .Template ); topology != "" {
823- podSetUpdates = append (podSetUpdates , kueue.PodSetUpdate {
824- Name : ps .Name ,
825- NodeSelector : map [string ]string {
826- core .TPUTopologyAnnotation : topology ,
827- },
828- })
829- }
830- }
831- ac .PodSetUpdates = podSetUpdates
832- case len (slicesByState [core .SliceStateFailed ]) > 0 :
846+ ac .PodSetUpdates = buildPodSetUpdates (wl )
847+ case effectiveFailedCount > 0 :
833848 ac .State = kueue .CheckStateRetry
834849 ac .RequeueAfterSeconds = ptr .To (int32 (r .retryDelayOnSliceFailure .Round (time .Second ).Seconds ()))
835850 case (features .Enabled (features .UseRetryMechanismForSliceCreation ) && len (slicesByState [core .SliceStateStale ]) > 0 ):
@@ -844,29 +859,89 @@ func (r *WorkloadReconciler) prepareAdmissionCheckStatus(ctx context.Context, wl
844859 default :
845860 ac .State = kueue .CheckStatePending
846861 }
862+ ac .Message = buildAdmissionCheckMessage (slicesByState , effectiveFailedCount , wl , podSetRequiresHealthy )
863+ }
847864
865+ func buildPodSetUpdates (wl * kueue.Workload ) []kueue.PodSetUpdate {
866+ var podSetUpdates []kueue.PodSetUpdate
867+ for _ , ps := range wl .Spec .PodSets {
868+ if topology := core .GetTPUTopology (ps .Template ); topology != "" {
869+ podSetUpdates = append (podSetUpdates , kueue.PodSetUpdate {
870+ Name : ps .Name ,
871+ NodeSelector : map [string ]string {
872+ core .TPUTopologyAnnotation : topology ,
873+ },
874+ })
875+ }
876+ }
877+ return podSetUpdates
878+ }
879+
880+ func buildAdmissionCheckMessage (slicesByState map [core.SliceState ][]v1beta1.Slice , effectiveFailedCount int , wl * kueue.Workload , podSetRequiresHealthy map [string ]bool ) string {
848881 var stateMessages []string
849882 for _ , state := range core .SliceStates {
850883 if count := len (slicesByState [state ]); count > 0 {
851884 stateMessages = append (stateMessages , fmt .Sprintf ("%d %s" , count , state ))
852885 }
853886 }
854887
855- if len (stateMessages ) > 0 {
856- ac .Message = fmt .Sprintf ("Slices are in states: %s" , strings .Join (stateMessages , ", " ))
888+ var message string
889+ if len (stateMessages ) == 0 {
890+ message = "Waiting for Slices to be created"
857891 } else {
858- ac . Message = "Waiting for Slices to be created"
892+ message = fmt . Sprintf ( "Slices are in states: %s" , strings . Join ( stateMessages , ", " ))
859893 }
860894
861- if len ( slicesByState [ core . SliceStateFailed ]) > 0 {
895+ if effectiveFailedCount > 0 {
862896 var errMessages []string
863897 for _ , slice := range slicesByState [core .SliceStateFailed ] {
864898 cond := meta .FindStatusCondition (slice .Status .Conditions , v1beta1 .SliceStateConditionType )
865- errMessages = append (errMessages , cond .Message )
899+ if cond != nil {
900+ errMessages = append (errMessages , cond .Message )
901+ }
866902 }
867- ac .Message += ". Errors: " + strings .Join (errMessages , "; " )
903+ if features .Enabled (features .FailOnUntoleratedDegradedSlice ) {
904+ for _ , slice := range slicesByState [core .SliceStateActiveDegraded ] {
905+ psName := slice .Annotations [core .OwnerPodSetNameAnnotation ]
906+ if ! healthySliceRequired (psName , podSetRequiresHealthy , wl ) {
907+ continue
908+ }
909+ if cond := meta .FindStatusCondition (slice .Status .Conditions , v1beta1 .SliceStateConditionType ); cond != nil {
910+ errMessages = append (errMessages , fmt .Sprintf ("%s (degraded)" , cond .Message ))
911+ }
912+ }
913+ }
914+ message += ". Errors: " + strings .Join (errMessages , "; " )
868915 }
869- ac .Message = api .TruncateConditionMessage (ac .Message )
916+ return api .TruncateConditionMessage (message )
917+ }
918+
919+ // healthySliceRequired returns true if the given podset requires healthy slice
920+ // The second part of the condition (psName == "") is for backward
921+ // compatibility for slices created before the OwnerPodSetNameAnnotation was introduced.
922+ func healthySliceRequired (psName string , podSetRequiresHealthy map [string ]bool , wl * kueue.Workload ) bool {
923+ if psName != "" {
924+ return podSetRequiresHealthy [psName ]
925+ }
926+ return anyPodSetRequestedOnlyHealthySlices (wl )
927+ }
928+
929+ func anyPodSetRequestedOnlyHealthySlices (wl * kueue.Workload ) bool {
930+ for _ , ps := range wl .Spec .PodSets {
931+ // if a least one podset requested only healthy
932+ if podSetRequestedOnlyHealthySlices (ps ) {
933+ return true
934+ }
935+ }
936+ return false
937+ }
938+
939+ func podSetRequestedOnlyHealthySlices (ps kueue.PodSet ) bool {
940+ if v , ok := ps .Template .Spec .NodeSelector [core .TPUSliceHealthNodeSelectorKey ]; ok {
941+ return v == core .TPUSliceHealthNodeSelectorHealthy
942+ }
943+
944+ return ! core .NodeAffinityAllowsValue (ps .Template .Spec .Affinity , core .TPUSliceHealthNodeSelectorKey , core .TPUSliceHealthNodeSelectorDegraded )
870945}
871946
872947// SetupWithManager sets up the controller with the Manager.
0 commit comments