Skip to content

Commit e36f0ee

Browse files
authored
[slice] Evict if Slice is DEGRADED (and not tolerated) (#1123)
* Evict if degraded not tolerated * Fix lint * Move to opt-in * Address comments * Address comments
1 parent 8e8ba14 commit e36f0ee

10 files changed

Lines changed: 590 additions & 39 deletions

File tree

slice/config/dev/manager_config_patch.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,6 @@
88
- op: add
99
path: /spec/template/spec/containers/0/args/0
1010
value: --retry-delay-on-slice-failure=0s
11+
- op: add
12+
path: /spec/template/spec/containers/0/args/0
13+
value: --feature-gates=FailOnUntoleratedDegradedSlice=true

slice/internal/controller/workload_controller.go

Lines changed: 96 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -806,30 +806,45 @@ func (r *WorkloadReconciler) syncAdmissionCheckStatus(ctx context.Context, wl *k
806806
return nil
807807
}
808808

809+
func calculateEffectiveSliceCounts(slicesByState map[core.SliceState][]v1beta1.Slice, wl *kueue.Workload, podSetRequiresHealthy map[string]bool) (int, int) {
810+
effectiveActiveCount := len(slicesByState[core.SliceStateActive])
811+
effectiveFailedCount := len(slicesByState[core.SliceStateFailed])
812+
813+
if features.Enabled(features.FailOnUntoleratedDegradedSlice) {
814+
for _, slice := range slicesByState[core.SliceStateActiveDegraded] {
815+
psName := slice.Annotations[core.OwnerPodSetNameAnnotation]
816+
if healthySliceRequired(psName, podSetRequiresHealthy, wl) {
817+
effectiveFailedCount++
818+
} else {
819+
effectiveActiveCount++
820+
}
821+
}
822+
} else {
823+
effectiveActiveCount += len(slicesByState[core.SliceStateActiveDegraded])
824+
}
825+
return effectiveActiveCount, effectiveFailedCount
826+
}
827+
809828
func (r *WorkloadReconciler) prepareAdmissionCheckStatus(ctx context.Context, wl *kueue.Workload, ac *kueue.AdmissionCheckState, slices []v1beta1.Slice, desiredSlicesCount int) {
810829
log := ctrl.LoggerFrom(ctx).V(2)
811830
// wait for Kueue to reset check to Pending after eviction
812831
if ac.State == kueue.CheckStateRetry {
813832
return
814833
}
815834
slicesByState := core.GroupSlicesByState(slices, r.activationTimeout)
835+
podSetRequiresHealthy := make(map[string]bool)
836+
if features.Enabled(features.FailOnUntoleratedDegradedSlice) {
837+
for _, ps := range wl.Spec.PodSets {
838+
podSetRequiresHealthy[string(ps.Name)] = podSetRequestedOnlyHealthySlices(ps)
839+
}
840+
}
841+
effectiveActiveCount, effectiveFailedCount := calculateEffectiveSliceCounts(slicesByState, wl, podSetRequiresHealthy)
816842

817843
switch {
818-
case desiredSlicesCount == len(slicesByState[core.SliceStateActive])+len(slicesByState[core.SliceStateActiveDegraded]):
844+
case desiredSlicesCount == effectiveActiveCount:
819845
ac.State = kueue.CheckStateReady
820-
var podSetUpdates []kueue.PodSetUpdate
821-
for _, ps := range wl.Spec.PodSets {
822-
if topology := core.GetTPUTopology(ps.Template); topology != "" {
823-
podSetUpdates = append(podSetUpdates, kueue.PodSetUpdate{
824-
Name: ps.Name,
825-
NodeSelector: map[string]string{
826-
core.TPUTopologyAnnotation: topology,
827-
},
828-
})
829-
}
830-
}
831-
ac.PodSetUpdates = podSetUpdates
832-
case len(slicesByState[core.SliceStateFailed]) > 0:
846+
ac.PodSetUpdates = buildPodSetUpdates(wl)
847+
case effectiveFailedCount > 0:
833848
ac.State = kueue.CheckStateRetry
834849
ac.RequeueAfterSeconds = ptr.To(int32(r.retryDelayOnSliceFailure.Round(time.Second).Seconds()))
835850
case (features.Enabled(features.UseRetryMechanismForSliceCreation) && len(slicesByState[core.SliceStateStale]) > 0):
@@ -844,29 +859,89 @@ func (r *WorkloadReconciler) prepareAdmissionCheckStatus(ctx context.Context, wl
844859
default:
845860
ac.State = kueue.CheckStatePending
846861
}
862+
ac.Message = buildAdmissionCheckMessage(slicesByState, effectiveFailedCount, wl, podSetRequiresHealthy)
863+
}
847864

865+
func buildPodSetUpdates(wl *kueue.Workload) []kueue.PodSetUpdate {
866+
var podSetUpdates []kueue.PodSetUpdate
867+
for _, ps := range wl.Spec.PodSets {
868+
if topology := core.GetTPUTopology(ps.Template); topology != "" {
869+
podSetUpdates = append(podSetUpdates, kueue.PodSetUpdate{
870+
Name: ps.Name,
871+
NodeSelector: map[string]string{
872+
core.TPUTopologyAnnotation: topology,
873+
},
874+
})
875+
}
876+
}
877+
return podSetUpdates
878+
}
879+
880+
func buildAdmissionCheckMessage(slicesByState map[core.SliceState][]v1beta1.Slice, effectiveFailedCount int, wl *kueue.Workload, podSetRequiresHealthy map[string]bool) string {
848881
var stateMessages []string
849882
for _, state := range core.SliceStates {
850883
if count := len(slicesByState[state]); count > 0 {
851884
stateMessages = append(stateMessages, fmt.Sprintf("%d %s", count, state))
852885
}
853886
}
854887

855-
if len(stateMessages) > 0 {
856-
ac.Message = fmt.Sprintf("Slices are in states: %s", strings.Join(stateMessages, ", "))
888+
var message string
889+
if len(stateMessages) == 0 {
890+
message = "Waiting for Slices to be created"
857891
} else {
858-
ac.Message = "Waiting for Slices to be created"
892+
message = fmt.Sprintf("Slices are in states: %s", strings.Join(stateMessages, ", "))
859893
}
860894

861-
if len(slicesByState[core.SliceStateFailed]) > 0 {
895+
if effectiveFailedCount > 0 {
862896
var errMessages []string
863897
for _, slice := range slicesByState[core.SliceStateFailed] {
864898
cond := meta.FindStatusCondition(slice.Status.Conditions, v1beta1.SliceStateConditionType)
865-
errMessages = append(errMessages, cond.Message)
899+
if cond != nil {
900+
errMessages = append(errMessages, cond.Message)
901+
}
866902
}
867-
ac.Message += ". Errors: " + strings.Join(errMessages, "; ")
903+
if features.Enabled(features.FailOnUntoleratedDegradedSlice) {
904+
for _, slice := range slicesByState[core.SliceStateActiveDegraded] {
905+
psName := slice.Annotations[core.OwnerPodSetNameAnnotation]
906+
if !healthySliceRequired(psName, podSetRequiresHealthy, wl) {
907+
continue
908+
}
909+
if cond := meta.FindStatusCondition(slice.Status.Conditions, v1beta1.SliceStateConditionType); cond != nil {
910+
errMessages = append(errMessages, fmt.Sprintf("%s (degraded)", cond.Message))
911+
}
912+
}
913+
}
914+
message += ". Errors: " + strings.Join(errMessages, "; ")
868915
}
869-
ac.Message = api.TruncateConditionMessage(ac.Message)
916+
return api.TruncateConditionMessage(message)
917+
}
918+
919+
// healthySliceRequired returns true if the given podset requires healthy slice
920+
// The second part of the condition (psName == "") is for backward
921+
// compatibility for slices created before the OwnerPodSetNameAnnotation was introduced.
922+
func healthySliceRequired(psName string, podSetRequiresHealthy map[string]bool, wl *kueue.Workload) bool {
923+
if psName != "" {
924+
return podSetRequiresHealthy[psName]
925+
}
926+
return anyPodSetRequestedOnlyHealthySlices(wl)
927+
}
928+
929+
func anyPodSetRequestedOnlyHealthySlices(wl *kueue.Workload) bool {
930+
for _, ps := range wl.Spec.PodSets {
931+
// if a least one podset requested only healthy
932+
if podSetRequestedOnlyHealthySlices(ps) {
933+
return true
934+
}
935+
}
936+
return false
937+
}
938+
939+
func podSetRequestedOnlyHealthySlices(ps kueue.PodSet) bool {
940+
if v, ok := ps.Template.Spec.NodeSelector[core.TPUSliceHealthNodeSelectorKey]; ok {
941+
return v == core.TPUSliceHealthNodeSelectorHealthy
942+
}
943+
944+
return !core.NodeAffinityAllowsValue(ps.Template.Spec.Affinity, core.TPUSliceHealthNodeSelectorKey, core.TPUSliceHealthNodeSelectorDegraded)
870945
}
871946

872947
// SetupWithManager sets up the controller with the Manager.

0 commit comments

Comments
 (0)