4343from dstack ._internal .server .services .fleets import (
4444 create_fleet_instance_model ,
4545 emit_fleet_status_change_event ,
46+ get_fleet_requirements ,
4647 get_fleet_spec ,
4748 get_next_instance_num ,
4849 is_fleet_empty ,
4950 is_fleet_in_use ,
5051)
52+ from dstack ._internal .server .services .instances import instance_matches_constraints
5153from dstack ._internal .server .services .locking import get_locker
5254from dstack ._internal .server .services .pipelines import PipelineHinterProtocol
5355from dstack ._internal .server .utils import sentry_utils
@@ -313,6 +315,7 @@ async def _refetch_locked_fleet_for_lock_decision(
313315 FleetModel .consolidation_attempt ,
314316 FleetModel .last_consolidated_at ,
315317 FleetModel .last_processed_at ,
318+ FleetModel .created_at ,
316319 )
317320 )
318321 .execution_options (populate_existing = True )
@@ -538,25 +541,36 @@ def _consolidate_fleet_state_with_spec(
538541 consolidation_instances : Sequence [InstanceModel ],
539542) -> _ProcessResult :
540543 result = _ProcessResult ()
541- maintain_nodes_result = _maintain_fleet_nodes_in_min_max_range (
544+
545+ spec_mismatch_updates = _terminate_instances_not_matching_fleet_spec (
542546 instances = consolidation_instances ,
543547 fleet_spec = consolidation_fleet_spec ,
544548 )
549+ if spec_mismatch_updates :
550+ result .instance_id_to_update_map .update (spec_mismatch_updates )
551+
552+ # Exclude spec-mismatched instances so min/max check sees only compatible instances.
553+ effective_instances = [i for i in consolidation_instances if i .id not in spec_mismatch_updates ]
554+
555+ maintain_nodes_result = _maintain_fleet_nodes_in_min_max_range (
556+ instances = effective_instances ,
557+ fleet_spec = consolidation_fleet_spec ,
558+ )
545559 if maintain_nodes_result .has_changes :
546- result .instance_id_to_update_map = maintain_nodes_result .instance_id_to_update_map
560+ result .instance_id_to_update_map . update ( maintain_nodes_result .instance_id_to_update_map )
547561 result .new_instance_creates = maintain_nodes_result .new_instance_creates
548- if maintain_nodes_result .changes_required :
562+ if len ( spec_mismatch_updates ) > 0 or maintain_nodes_result .changes_required :
549563 result .fleet_update_map ["consolidation_attempt" ] = fleet_model .consolidation_attempt + 1
550564 else :
551- # The fleet is consolidated with respect to nodes min/max.
565+ # The fleet is consolidated with respect to spec and nodes min/max.
552566 result .fleet_update_map ["consolidation_attempt" ] = 0
553567 result .fleet_update_map ["last_consolidated_at" ] = NOW_PLACEHOLDER
554568 return result
555569
556570
557571def _is_fleet_ready_for_consolidation (fleet_model : FleetModel ) -> bool :
558572 consolidation_retry_delay = _get_consolidation_retry_delay (fleet_model .consolidation_attempt )
559- last_consolidated_at = fleet_model .last_consolidated_at or fleet_model .last_processed_at
573+ last_consolidated_at = fleet_model .last_consolidated_at or fleet_model .created_at
560574 duration_since_last_consolidation = get_current_datetime () - last_consolidated_at
561575 return duration_since_last_consolidation >= consolidation_retry_delay
562576
@@ -579,6 +593,47 @@ def _get_consolidation_retry_delay(consolidation_attempt: int) -> timedelta:
579593 return _CONSOLIDATION_RETRY_DELAYS [- 1 ]
580594
581595
596+ def _terminate_instances_not_matching_fleet_spec (
597+ instances : Sequence [InstanceModel ],
598+ fleet_spec : FleetSpec ,
599+ ) -> dict [uuid .UUID , _InstanceUpdateMap ]:
600+ updates : dict [uuid .UUID , _InstanceUpdateMap ] = {}
601+ for instance in instances :
602+ if not _can_terminate_spec_mismatched_instance (instance ):
603+ continue
604+ if not _instance_matches_fleet_spec (instance , fleet_spec ):
605+ updates [instance .id ] = {
606+ "status" : InstanceStatus .TERMINATING ,
607+ "termination_reason" : InstanceTerminationReason .FLEET_SPEC_MISMATCH ,
608+ "termination_reason_message" : "Instance does not match fleet spec" ,
609+ }
610+ return updates
611+
612+
613+ def _can_terminate_spec_mismatched_instance (instance : InstanceModel ) -> bool :
614+ if instance .deleted :
615+ return False
616+ # Pending instances have not selected an offer yet, so InstancePipeline will provision them
617+ # using the current fleet spec. Recycle only instances already tied to the old spec.
618+ return instance .status in (InstanceStatus .IDLE , InstanceStatus .PROVISIONING )
619+
620+
621+ def _instance_matches_fleet_spec (instance : InstanceModel , fleet_spec : FleetSpec ) -> bool :
622+ if instance .offer is None :
623+ # Not yet provisioned — will be provisioned using the current (updated) spec.
624+ return True
625+ profile = fleet_spec .merged_profile
626+ requirements = get_fleet_requirements (fleet_spec )
627+ return instance_matches_constraints (
628+ instance ,
629+ backend_types = profile .backends ,
630+ regions = profile .regions ,
631+ instance_types = profile .instance_types ,
632+ zones = profile .availability_zones ,
633+ requirements = requirements ,
634+ )
635+
636+
582637def _maintain_fleet_nodes_in_min_max_range (
583638 instances : Sequence [InstanceModel ],
584639 fleet_spec : FleetSpec ,
0 commit comments