fix: remove backend-wide k8s_max_concurrent_pods, replaced with dedicated limits for system-wide k8s quota on cpu and memory

HardMax71 · HardMax71 · commit 76b9efee3254 · 2026-03-03T01:20:45.000+01:00
diff --git a/backend/app/services/k8s_worker/worker.py b/backend/app/services/k8s_worker/worker.py
@@ -63,7 +63,6 @@ def __init__(
 
         # State tracking
         self._active_creations: set[str] = set()
-        self._creation_semaphore = asyncio.Semaphore(self._settings.K8S_MAX_CONCURRENT_PODS)
 
         self.logger.info(f"KubernetesWorker initialized for namespace {self._settings.K8S_NAMESPACE}")
 
@@ -104,52 +103,51 @@ async def handle_delete_pod_command(self, command: DeletePodCommandEvent) -> Non
 
     async def _create_pod_for_execution(self, command: CreatePodCommandEvent) -> None:
         """Create pod for execution"""
-        async with self._creation_semaphore:
-            execution_id = command.execution_id
-            self._active_creations.add(execution_id)
-            self.metrics.update_active_pod_creations(len(self._active_creations))
+        execution_id = command.execution_id
+        self._active_creations.add(execution_id)
+        self.metrics.update_active_pod_creations(len(self._active_creations))
 
-            start_time = time.time()
+        start_time = time.time()
 
-            try:
-                script_content = command.script
-                entrypoint_content = await self._get_entrypoint_script()
+        try:
+            script_content = command.script
+            entrypoint_content = await self._get_entrypoint_script()
 
-                # Create ConfigMap
-                config_map = self.pod_builder.build_config_map(
-                    command=command, script_content=script_content, entrypoint_content=entrypoint_content
-                )
+            # Create ConfigMap
+            config_map = self.pod_builder.build_config_map(
+                command=command, script_content=script_content, entrypoint_content=entrypoint_content
+            )
 
-                await self._create_config_map(config_map)
+            await self._create_config_map(config_map)
 
-                pod = self.pod_builder.build_pod_manifest(command=command)
-                created_pod = await self._create_pod(pod)
+            pod = self.pod_builder.build_pod_manifest(command=command)
+            created_pod = await self._create_pod(pod)
 
-                # Set ownerReference so K8s garbage-collects the ConfigMap when the pod is deleted
-                if created_pod and created_pod.metadata and created_pod.metadata.uid:
-                    await self._set_configmap_owner(config_map, created_pod)
+            # Set ownerReference so K8s garbage-collects the ConfigMap when the pod is deleted
+            if created_pod and created_pod.metadata and created_pod.metadata.uid:
+                await self._set_configmap_owner(config_map, created_pod)
 
-                # Publish PodCreated event
-                await self._publish_pod_created(command, pod)
+            # Publish PodCreated event
+            await self._publish_pod_created(command, pod)
 
-                # Update metrics
-                duration = time.time() - start_time
-                self.metrics.record_k8s_pod_creation_duration(duration, command.language)
-                self.metrics.record_k8s_pod_created("success", command.language)
+            # Update metrics
+            duration = time.time() - start_time
+            self.metrics.record_k8s_pod_creation_duration(duration, command.language)
+            self.metrics.record_k8s_pod_created("success", command.language)
 
-                self.logger.info(
-                    f"Successfully created pod {pod.metadata.name} for execution {execution_id}. "
-                    f"Duration: {duration:.2f}s"
-                )
+            self.logger.info(
+                f"Successfully created pod {pod.metadata.name} for execution {execution_id}. "
+                f"Duration: {duration:.2f}s"
+            )
 
-            except Exception as e:
-                self.logger.error(f"Failed to create pod for execution {execution_id}: {e}", exc_info=True)
-                self.metrics.record_k8s_pod_created("failed", "unknown")
-                await self._publish_pod_creation_failed(command, str(e))
+        except Exception as e:
+            self.logger.error(f"Failed to create pod for execution {execution_id}: {e}", exc_info=True)
+            self.metrics.record_k8s_pod_created("failed", "unknown")
+            await self._publish_pod_creation_failed(command, str(e))
 
-            finally:
-                self._active_creations.discard(execution_id)
-                self.metrics.update_active_pod_creations(len(self._active_creations))
+        finally:
+            self._active_creations.discard(execution_id)
+            self.metrics.update_active_pod_creations(len(self._active_creations))
 
     async def _get_entrypoint_script(self) -> str:
         """Get entrypoint script content"""
@@ -257,7 +255,7 @@ async def ensure_namespace_security(self) -> None:
 
         Creates:
         - Default-deny NetworkPolicy for executor pods (blocks lateral movement and exfiltration)
-        - ResourceQuota to cap aggregate pod/resource consumption
+        - ResourceQuota to cap aggregate CPU/memory consumption (no pod count limit)
         - Pod Security Admission labels (Restricted profile)
         """
         namespace = self._settings.K8S_NAMESPACE
@@ -293,9 +291,8 @@ async def _ensure_executor_network_policy(self, namespace: str) -> None:
         self.logger.info(f"NetworkPolicy '{policy_name}' applied in namespace {namespace}")
 
     async def _ensure_executor_resource_quota(self, namespace: str) -> None:
-        """Create or update ResourceQuota to cap aggregate executor pod consumption."""
+        """Create or update ResourceQuota to cap aggregate CPU/memory in the executor namespace."""
         quota_name = "executor-quota"
-        n = self._settings.K8S_MAX_CONCURRENT_PODS
 
         quota = k8s_client.V1ResourceQuota(
             api_version="v1",
@@ -307,11 +304,10 @@ async def _ensure_executor_resource_quota(self, namespace: str) -> None:
             ),
             spec=k8s_client.V1ResourceQuotaSpec(
                 hard={
-                    "pods": str(n),
-                    "requests.cpu": f"{int(self._settings.K8S_POD_CPU_REQUEST.removesuffix('m')) * n}m",
-                    "requests.memory": f"{int(self._settings.K8S_POD_MEMORY_REQUEST.removesuffix('Mi')) * n}Mi",
-                    "limits.cpu": f"{int(self._settings.K8S_POD_CPU_LIMIT.removesuffix('m')) * n}m",
-                    "limits.memory": f"{int(self._settings.K8S_POD_MEMORY_LIMIT.removesuffix('Mi')) * n}Mi",
+                    "requests.cpu": self._settings.K8S_QUOTA_CPU,
+                    "requests.memory": self._settings.K8S_QUOTA_MEMORY,
+                    "limits.cpu": self._settings.K8S_QUOTA_CPU,
+                    "limits.memory": self._settings.K8S_QUOTA_MEMORY,
                 },
             ),
         )
diff --git a/backend/app/services/runtime_settings.py b/backend/app/services/runtime_settings.py
@@ -51,6 +51,5 @@ def _build_toml_defaults(self) -> SystemSettings:
             max_timeout_seconds=s.K8S_POD_EXECUTION_TIMEOUT,
             memory_limit=s.K8S_POD_MEMORY_LIMIT,
             cpu_limit=s.K8S_POD_CPU_LIMIT,
-            max_concurrent_executions=s.K8S_MAX_CONCURRENT_PODS,
             session_timeout_minutes=s.ACCESS_TOKEN_EXPIRE_MINUTES,
         )
diff --git a/backend/app/settings.py b/backend/app/settings.py
@@ -68,9 +68,6 @@ def __init__(
     # Kubernetes namespace for execution pods
     K8S_NAMESPACE: str = "integr8scode"
 
-    # Maximum concurrent pod creations allowed by k8s worker
-    K8S_MAX_CONCURRENT_PODS: int = 10
-
     # Settings for Kubernetes resource limits and requests
     K8S_POD_CPU_LIMIT: str = "1000m"
     K8S_POD_MEMORY_LIMIT: str = "128Mi"
@@ -80,6 +77,10 @@ def __init__(
     K8S_POD_PRIORITY_CLASS_NAME: str | None = None
     K8S_POD_RUNTIME_CLASS_NAME: str | None = None  # e.g. "gvisor" for sandboxed execution
 
+    # Namespace-level ResourceQuota caps (total budget, not per-pod)
+    K8S_QUOTA_CPU: str = "10000m"
+    K8S_QUOTA_MEMORY: str = "1280Mi"
+
     SUPPORTED_RUNTIMES: dict[str, LanguageInfoDomain] = Field(default_factory=lambda: RUNTIME_MATRIX)
 
     EXAMPLE_SCRIPTS: dict[str, str] = Field(default_factory=lambda: EXEC_EXAMPLE_SCRIPTS)
diff --git a/backend/tests/e2e/test_admin_settings_routes.py b/backend/tests/e2e/test_admin_settings_routes.py
@@ -172,7 +172,6 @@ async def test_reset_system_settings(
             max_timeout_seconds=test_settings.K8S_POD_EXECUTION_TIMEOUT,
             memory_limit=test_settings.K8S_POD_MEMORY_LIMIT,
             cpu_limit=test_settings.K8S_POD_CPU_LIMIT,
-            max_concurrent_executions=test_settings.K8S_MAX_CONCURRENT_PODS,
             session_timeout_minutes=test_settings.ACCESS_TOKEN_EXPIRE_MINUTES,
         )
         assert settings == expected
diff --git a/backend/tests/unit/services/test_runtime_settings.py b/backend/tests/unit/services/test_runtime_settings.py
@@ -19,7 +19,6 @@ def _make_settings() -> Settings:
         "K8S_POD_EXECUTION_TIMEOUT": 30,
         "K8S_POD_MEMORY_LIMIT": "128Mi",
         "K8S_POD_CPU_LIMIT": "1000m",
-        "K8S_MAX_CONCURRENT_PODS": 5,
         "ACCESS_TOKEN_EXPIRE_MINUTES": 60,
     })
 
@@ -58,7 +57,7 @@ async def test_passes_toml_defaults_to_repo() -> None:
     assert defaults.max_timeout_seconds == 30
     assert defaults.memory_limit == "128Mi"
     assert defaults.cpu_limit == "1000m"
-    assert defaults.max_concurrent_executions == 5
+    assert defaults.max_concurrent_executions == 10
     assert defaults.session_timeout_minutes == 60
 
 
diff --git a/docs/security/policies.md b/docs/security/policies.md
@@ -62,17 +62,17 @@ This policy matches pods with the `component: executor` label, which the pod bui
 
 ### Resource Quota
 
-A ResourceQuota caps aggregate resource consumption in the executor namespace:
+A ResourceQuota caps aggregate CPU and memory in the executor namespace. If the execution queue allows more pods than
+the namespace has resources for, Kubernetes keeps excess pods in Pending state rather than failing.
 
-| Resource          | Limit                             | Derivation                       |
-|-------------------|-----------------------------------|----------------------------------|
-| `pods`            | `K8S_MAX_CONCURRENT_PODS`         | Maximum concurrent executor pods |
-| `requests.cpu`    | `K8S_MAX_CONCURRENT_PODS` cores   | 1 core per pod                   |
-| `requests.memory` | `K8S_MAX_CONCURRENT_PODS × 128Mi` | 128Mi per pod                    |
-| `limits.cpu`      | Same as requests                  | Prevents burst beyond quota      |
-| `limits.memory`   | Same as requests                  | Prevents OOM beyond quota        |
+| Resource          | Limit              | Source setting     |
+|-------------------|--------------------|--------------------|
+| `requests.cpu`    | `K8S_QUOTA_CPU`    | Namespace CPU cap  |
+| `requests.memory` | `K8S_QUOTA_MEMORY` | Namespace memory cap |
+| `limits.cpu`      | `K8S_QUOTA_CPU`    | Same as requests   |
+| `limits.memory`   | `K8S_QUOTA_MEMORY` | Same as requests   |
 
-This prevents a burst of executions from starving other workloads in the cluster.
+No `pods` count in the quota — concurrency is controlled by the execution queue (`max_concurrent_executions`).
 
 ### Pod Security Admission (PSA)
 

Original file line number	Diff line number	Diff line change
`@@ -51,6 +51,5 @@ def _build_toml_defaults(self) -> SystemSettings:`
`51`	`51`	`max_timeout_seconds=s.K8S_POD_EXECUTION_TIMEOUT,`
`52`	`52`	`memory_limit=s.K8S_POD_MEMORY_LIMIT,`
`53`	`53`	`cpu_limit=s.K8S_POD_CPU_LIMIT,`
`54`		`- max_concurrent_executions=s.K8S_MAX_CONCURRENT_PODS,`
`55`	`54`	`session_timeout_minutes=s.ACCESS_TOKEN_EXPIRE_MINUTES,`
`56`	`55`	`)`
Original file line number	Diff line number	Diff line change
`@@ -172,7 +172,6 @@ async def test_reset_system_settings(`
`172`	`172`	`max_timeout_seconds=test_settings.K8S_POD_EXECUTION_TIMEOUT,`
`173`	`173`	`memory_limit=test_settings.K8S_POD_MEMORY_LIMIT,`
`174`	`174`	`cpu_limit=test_settings.K8S_POD_CPU_LIMIT,`
`175`		`- max_concurrent_executions=test_settings.K8S_MAX_CONCURRENT_PODS,`
`176`	`175`	`session_timeout_minutes=test_settings.ACCESS_TOKEN_EXPIRE_MINUTES,`
`177`	`176`	`)`
`178`	`177`	`assert settings == expected`