From 4e661c892e9cbd6ceb198dd7f0febc04931eed8d Mon Sep 17 00:00:00 2001 From: Lohit Kolluri Date: Thu, 28 May 2026 16:07:08 +0530 Subject: [PATCH] constraintenforcer: ignore completed tasks in reservation accounting Completed tasks (especially from replicated-job history) should not consume node reservations in the constraint enforcer. Add a regression test covering completed job tasks. Signed-off-by: Lohit Kolluri --- .../constraintenforcer/constraint_enforcer.go | 6 ++ .../constraint_enforcer_test.go | 76 +++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/manager/orchestrator/constraintenforcer/constraint_enforcer.go b/manager/orchestrator/constraintenforcer/constraint_enforcer.go index 296767852e..12fa40151f 100644 --- a/manager/orchestrator/constraintenforcer/constraint_enforcer.go +++ b/manager/orchestrator/constraintenforcer/constraint_enforcer.go @@ -118,6 +118,12 @@ loop: if t.DesiredState < api.TaskStateAssigned || t.DesiredState > api.TaskStateCompleted { continue } + // Completed tasks should not consume node reservations. This is + // particularly important for replicated-job services, where tasks may + // remain with DesiredState=Completed after they finished. + if t.Status.State >= api.TaskStateCompleted { + continue + } // Ensure that the node still satisfies placement constraints. // NOTE: If the task is associacted with a service then we must use the diff --git a/manager/orchestrator/constraintenforcer/constraint_enforcer_test.go b/manager/orchestrator/constraintenforcer/constraint_enforcer_test.go index f63518b1aa..527af9d1d9 100644 --- a/manager/orchestrator/constraintenforcer/constraint_enforcer_test.go +++ b/manager/orchestrator/constraintenforcer/constraint_enforcer_test.go @@ -12,6 +12,82 @@ import ( "github.com/stretchr/testify/require" ) +func TestRejectNoncompliantTasksIgnoresCompletedJobTasksInReservations(t *testing.T) { + s := store.NewMemoryStore(nil) + require.NotNil(t, s) + t.Cleanup(func() { _ = s.Close() }) + + node := &api.Node{ + ID: "node1", + Spec: api.NodeSpec{ + Availability: api.NodeAvailabilityActive, + }, + Description: &api.NodeDescription{ + Resources: &api.Resources{ + MemoryBytes: 1024, + }, + }, + } + + // A live task that fits if completed job tasks are ignored. + runningTask := &api.Task{ + ID: "running1", + NodeID: node.ID, + ServiceID: "svc1", + DesiredState: api.TaskStateRunning, + Status: api.TaskStatus{ + State: api.TaskStateRunning, + }, + Spec: api.TaskSpec{ + Resources: &api.ResourceRequirements{ + Reservations: &api.Resources{ + MemoryBytes: 700, + }, + }, + }, + } + + // A completed replicated-job task that should not consume reservations. + completedJobTask := &api.Task{ + ID: "job1", + NodeID: node.ID, + ServiceID: "jobsvc", + DesiredState: api.TaskStateCompleted, + Status: api.TaskStatus{ + State: api.TaskStateCompleted, + }, + Spec: api.TaskSpec{ + Resources: &api.ResourceRequirements{ + Reservations: &api.Resources{ + MemoryBytes: 700, + }, + }, + }, + } + + require.NoError(t, s.Update(func(tx store.Tx) error { + if err := store.CreateNode(tx, node); err != nil { + return err + } + if err := store.CreateTask(tx, runningTask); err != nil { + return err + } + if err := store.CreateTask(tx, completedJobTask); err != nil { + return err + } + return nil + })) + + ce := New(s) + ce.rejectNoncompliantTasks(node) + + s.View(func(tx store.ReadTx) { + got := store.GetTask(tx, runningTask.ID) + require.NotNil(t, got) + assert.NotEqual(t, api.TaskStateRejected, got.Status.State, "running task unexpectedly rejected; completed job tasks should not count toward reservations") + }) +} + func TestConstraintEnforcer(t *testing.T) { nodes := []*api.Node{ // this node starts as a worker, but then is changed to a manager.