1+ import ast
12import asyncio
2- import json
33import os
44from datetime import datetime , timedelta , timezone
55from pathlib import Path
@@ -97,7 +97,11 @@ async def graceful_shutdown(self) -> None:
9797 logger .warning ("Shutdown timeout reached, forcing pod termination" )
9898 break
9999 try :
100- await self ._cleanup_pod_resources (pod_name )
100+ if not pod_name .startswith ("execution-" ):
101+ return
102+ execution_id = pod_name [len ("execution-" ):]
103+ config_map_name = f"script-{ execution_id } "
104+ await self ._cleanup_resources (pod_name , config_map_name )
101105 except Exception as e :
102106 logger .error (f"Error during pod cleanup on shutdown: { str (e )} " )
103107
@@ -156,18 +160,18 @@ async def create_execution_pod(
156160 pod_name = f"execution-{ execution_id } "
157161
158162 try :
159- entrypoint_script_path = Path ("app/scripts/entrypoint.py " )
163+ entrypoint_script_path = Path ("app/scripts/entrypoint.sh " )
160164 entrypoint_code = await asyncio .to_thread (entrypoint_script_path .read_text )
161165
162- config_map_data ["entrypoint.py " ] = entrypoint_code
166+ config_map_data ["entrypoint.sh " ] = entrypoint_code
163167
164168 config_map_body = k8s_client .V1ConfigMap (
165169 metadata = k8s_client .V1ObjectMeta (name = config_map_name ),
166170 data = config_map_data
167171 )
168172 await self ._create_config_map (config_map_body )
169173
170- final_pod_command = ["/scripts/entrypoint.py" ] + command
174+ final_pod_command = ["/bin/sh" , "/ scripts/entrypoint.sh" , * command ]
171175
172176 builder = PodManifestBuilder (
173177 execution_id = execution_id ,
@@ -193,44 +197,34 @@ async def create_execution_pod(
193197 await self ._cleanup_resources (pod_name , config_map_name )
194198 raise KubernetesPodError (f"Failed to create execution pod: { str (e )} " ) from e
195199
196- async def get_pod_logs (self , execution_id : str ) -> tuple [str , str , dict ]:
197- # This method reverts to the simple version that parses the clean log output
200+ async def get_pod_logs (self , execution_id : str ) -> tuple [dict , str ]:
198201 pod_name = f"execution-{ execution_id } "
199202 config_map_name = f"script-{ execution_id } "
200-
201203 try :
202204 pod = await self ._wait_for_pod_completion (pod_name )
203205 pod_phase = pod .status .phase if pod and pod .status else "Unknown"
204206 full_logs = await self ._get_container_logs (pod_name , "script-runner" )
207+ logger .info (f"Raw logs from pod { pod_name } :\n ---\n { full_logs } \n ---" )
205208
206- # The simple, reliable parser for the ###METRICS### block
207- output , metrics = self ._extract_execution_metrics (full_logs )
208-
209- final_exit_code = metrics .get ("exit_code" , 1 )
210- metrics ["pod_phase" ] = pod_phase
211- metrics ["status" ] = "completed" if final_exit_code == 0 else "error"
212-
213- return output , pod_phase , metrics
209+ try :
210+ # https://stackoverflow.com/questions/15197673/using-pythons-eval-vs-ast-literal-eval
211+ metrics = ast .literal_eval (full_logs )
212+ return metrics , pod_phase
213+ except (ValueError , SyntaxError , TypeError ) as e :
214+ logger .error (f"FAILED TO PARSE LOGS FROM POD { pod_name } as a Python literal: { e } " )
215+ error_payload = {
216+ "exit_code" : - 1 ,
217+ "stdout" : "" ,
218+ "stderr" : f"Internal execution error: Pod logs were not valid JSON. "
219+ f"Pod phase: { pod_phase } .\n Raw Logs:\n { full_logs } " ,
220+ "resource_usage" : None ,
221+ }
222+ return error_payload , pod_phase
214223 finally :
215224 logger .info (f"Initiating cleanup for execution '{ execution_id } '..." )
216225 await self ._cleanup_resources (pod_name , config_map_name )
217226 self ._active_pods .pop (execution_id , None )
218227
219- def _extract_execution_metrics (self , logs : str ) -> tuple [str , dict ]:
220- # This is the simple parser for the entrypoint.py output
221- split_marker = "\n ###METRICS###\n "
222- if split_marker in logs :
223- output , metrics_json = logs .rsplit (split_marker , 1 )
224- try :
225- metrics_data = json .loads (metrics_json )
226- return output .strip (), metrics_data
227- except json .JSONDecodeError :
228- logger .error (f"Failed to decode metrics JSON: { metrics_json } " )
229- return logs .strip (), {"error" : "Failed to decode metrics JSON." , "exit_code" : 1 }
230-
231- logger .warning ("Metrics marker not found in logs." )
232- return logs .strip (), {"error" : "Metrics marker not found in logs." , "exit_code" : 1 }
233-
234228 async def _wait_for_pod_completion (self , pod_name : str ) -> k8s_client .V1Pod :
235229 logger .info (f"Waiting for pod '{ pod_name } ' to complete..." )
236230 for _ in range (self .POD_RETRY_ATTEMPTS ):
@@ -292,13 +286,6 @@ async def _cleanup_resources(self, pod_name: str, config_map_name: str) -> None:
292286 except ApiException as e :
293287 logger .error (f"Failed to delete config map '{ config_map_name } ': { e .reason } " )
294288
295- async def _cleanup_pod_resources (self , pod_name : str ) -> None :
296- if not pod_name .startswith ("execution-" ):
297- return
298- execution_id = pod_name [len ("execution-" ):]
299- config_map_name = f"script-{ execution_id } "
300- await self ._cleanup_resources (pod_name , config_map_name )
301-
302289
303290def get_k8s_manager (request : Request ) -> KubernetesServiceManager :
304291 if not hasattr (request .app .state , "k8s_manager" ):
0 commit comments