@@ -625,39 +625,36 @@ static PyObject *erlang_call_impl(PyObject *self, PyObject *args) {
625625 func_term ,
626626 args_term );
627627
628- uint32_t response_len = 0 ;
629- ssize_t n ;
630628 char * response_data = NULL ;
629+ uint32_t response_len = 0 ;
630+ int read_result ;
631631
632632 Py_BEGIN_ALLOW_THREADS
633633 enif_send (NULL , & tl_current_worker -> callback_handler , msg_env , msg );
634634 enif_free_env (msg_env );
635- n = read (tl_current_worker -> callback_pipe [0 ], & response_len , sizeof (response_len ));
635+ /* Use 30 second timeout to prevent indefinite blocking */
636+ read_result = read_length_prefixed_data (
637+ tl_current_worker -> callback_pipe [0 ],
638+ & response_data , & response_len , 30000 );
636639 Py_END_ALLOW_THREADS
637640
638- if (n != sizeof (response_len )) {
639- PyErr_SetString (PyExc_RuntimeError , "Failed to read callback response length" );
641+ if (read_result == -1 ) {
642+ if (errno == ETIMEDOUT ) {
643+ PyErr_SetString (PyExc_TimeoutError , "Callback response timed out" );
644+ } else {
645+ PyErr_SetString (PyExc_RuntimeError , "Failed to read callback response" );
646+ }
640647 return NULL ;
641648 }
642-
643- response_data = enif_alloc (response_len );
644- if (response_data == NULL ) {
649+ if (read_result == -2 ) {
645650 PyErr_SetString (PyExc_MemoryError , "Failed to allocate response buffer" );
646651 return NULL ;
647652 }
648653
649- Py_BEGIN_ALLOW_THREADS
650- n = read (tl_current_worker -> callback_pipe [0 ], response_data , response_len );
651- Py_END_ALLOW_THREADS
652-
653- if (n != (ssize_t )response_len ) {
654+ PyObject * result = parse_callback_response ((unsigned char * )response_data , response_len );
655+ if (response_data != NULL ) {
654656 enif_free (response_data );
655- PyErr_SetString (PyExc_RuntimeError , "Failed to read callback response data" );
656- return NULL ;
657657 }
658-
659- PyObject * result = parse_callback_response ((unsigned char * )response_data , response_len );
660- enif_free (response_data );
661658 return result ;
662659 }
663660
@@ -723,19 +720,19 @@ extern bool g_has_thread_coordinator;
723720static int g_async_callback_pipe [2 ] = {-1 , -1 }; /* [0]=read, [1]=write */
724721static PyObject * g_async_pending_futures = NULL ; /* Dict: callback_id -> Future */
725722static pthread_mutex_t g_async_futures_mutex = PTHREAD_MUTEX_INITIALIZER ;
726- static bool g_async_callback_initialized = false;
723+
724+ /* Thread-safe initialization using pthread_once */
725+ static pthread_once_t g_async_callback_init_once = PTHREAD_ONCE_INIT ;
726+ static int g_async_callback_init_result = 0 ;
727727
728728/**
729- * Initialize async callback system .
730- * Creates the response pipe and pending futures dict .
729+ * Internal initialization function called by pthread_once .
730+ * Thread-safe: only called once by pthread_once .
731731 */
732- static int async_callback_init (void ) {
733- if (g_async_callback_initialized ) {
734- return 0 ;
735- }
736-
732+ static void async_callback_init_impl (void ) {
737733 if (pipe (g_async_callback_pipe ) < 0 ) {
738- return -1 ;
734+ g_async_callback_init_result = -1 ;
735+ return ;
739736 }
740737
741738 /* Set the read end to non-blocking for asyncio compatibility */
@@ -750,11 +747,21 @@ static int async_callback_init(void) {
750747 close (g_async_callback_pipe [1 ]);
751748 g_async_callback_pipe [0 ] = -1 ;
752749 g_async_callback_pipe [1 ] = -1 ;
753- return -1 ;
750+ g_async_callback_init_result = -1 ;
751+ return ;
754752 }
755753
756- g_async_callback_initialized = true;
757- return 0 ;
754+ g_async_callback_init_result = 0 ;
755+ }
756+
757+ /**
758+ * Initialize async callback system.
759+ * Creates the response pipe and pending futures dict.
760+ * Thread-safe: uses pthread_once for initialization.
761+ */
762+ static int async_callback_init (void ) {
763+ pthread_once (& g_async_callback_init_once , async_callback_init_impl );
764+ return g_async_callback_init_result ;
758765}
759766
760767/**
@@ -893,11 +900,10 @@ static PyObject *get_async_callback_fd(PyObject *self, PyObject *args) {
893900 (void )self ;
894901 (void )args ;
895902
896- if (!g_async_callback_initialized ) {
897- if (async_callback_init () < 0 ) {
898- PyErr_SetString (PyExc_RuntimeError , "Failed to initialize async callback system" );
899- return NULL ;
900- }
903+ /* async_callback_init uses pthread_once, so it's safe to call multiple times */
904+ if (async_callback_init () < 0 ) {
905+ PyErr_SetString (PyExc_RuntimeError , "Failed to initialize async callback system" );
906+ return NULL ;
901907 }
902908
903909 return PyLong_FromLong (g_async_callback_pipe [0 ]);
@@ -1328,40 +1334,75 @@ static void *async_event_loop_thread(void *arg) {
13281334 PyErr_Clear ();
13291335 }
13301336
1331- /* Check for completed futures (GIL held) */
1337+ /*
1338+ * Check for completed futures (GIL held).
1339+ *
1340+ * IMPORTANT: We must not hold the mutex while calling Python functions
1341+ * to avoid deadlocks. The pattern is:
1342+ * 1. Lock mutex, collect completed items, unlock
1343+ * 2. Process callbacks outside mutex (no contention)
1344+ * 3. Lock mutex, remove processed items, unlock
1345+ */
1346+
1347+ /* Phase 1: Collect completed futures under mutex */
1348+ #define MAX_COMPLETED_BATCH 16
1349+ async_pending_t * completed [MAX_COMPLETED_BATCH ];
1350+ int num_completed = 0 ;
1351+
13321352 pthread_mutex_lock (& worker -> queue_mutex );
1333- async_pending_t * prev = NULL ;
13341353 async_pending_t * p = worker -> pending_head ;
1335- while (p != NULL ) {
1354+ while (p != NULL && num_completed < MAX_COMPLETED_BATCH ) {
13361355 if (p -> future != NULL ) {
1356+ /* Quick check if future is done (still needs GIL, but mutex held briefly) */
13371357 PyObject * done = PyObject_CallMethod (p -> future , "done" , NULL );
13381358 if (done != NULL && PyObject_IsTrue (done )) {
13391359 Py_DECREF (done );
1360+ completed [num_completed ++ ] = p ;
1361+ } else {
1362+ Py_XDECREF (done );
1363+ }
1364+ }
1365+ p = p -> next ;
1366+ }
1367+ pthread_mutex_unlock (& worker -> queue_mutex );
13401368
1341- /* Future is complete - process it */
1342- async_future_callback (worker , p );
1369+ /* Phase 2: Process completed callbacks outside mutex (no deadlock risk) */
1370+ for (int i = 0 ; i < num_completed ; i ++ ) {
1371+ async_future_callback (worker , completed [i ]);
1372+ }
13431373
1344- /* Remove from list */
1345- Py_DECREF (p -> future );
1346- if (prev == NULL ) {
1347- worker -> pending_head = p -> next ;
1348- } else {
1349- prev -> next = p -> next ;
1350- }
1351- if (p == worker -> pending_tail ) {
1352- worker -> pending_tail = prev ;
1374+ /* Phase 3: Remove processed items under mutex */
1375+ if (num_completed > 0 ) {
1376+ pthread_mutex_lock (& worker -> queue_mutex );
1377+ for (int i = 0 ; i < num_completed ; i ++ ) {
1378+ async_pending_t * to_remove = completed [i ];
1379+
1380+ /* Find and remove from list */
1381+ async_pending_t * prev = NULL ;
1382+ p = worker -> pending_head ;
1383+ while (p != NULL ) {
1384+ if (p == to_remove ) {
1385+ /* Remove from list */
1386+ if (prev == NULL ) {
1387+ worker -> pending_head = p -> next ;
1388+ } else {
1389+ prev -> next = p -> next ;
1390+ }
1391+ if (p == worker -> pending_tail ) {
1392+ worker -> pending_tail = prev ;
1393+ }
1394+ break ;
13531395 }
1354- async_pending_t * to_free = p ;
1396+ prev = p ;
13551397 p = p -> next ;
1356- enif_free (to_free );
1357- continue ;
13581398 }
1359- Py_XDECREF (done );
1399+
1400+ /* Clean up */
1401+ Py_DECREF (to_remove -> future );
1402+ enif_free (to_remove );
13601403 }
1361- prev = p ;
1362- p = p -> next ;
1404+ pthread_mutex_unlock (& worker -> queue_mutex );
13631405 }
1364- pthread_mutex_unlock (& worker -> queue_mutex );
13651406 }
13661407
13671408 /* Stop and close the event loop */
0 commit comments