@@ -348,8 +348,7 @@ public void close() {
348348
349349 /**
350350 * The last error that occurred during session creation. This is stored temporarily and cleared
351- * when a session is successfully created. Unlike the previous implementation, this error is not
352- * cached forever - subsequent requests will retry session creation.
351+ * when a session is successfully created.
353352 */
354353 @ VisibleForTesting final AtomicReference <Throwable > lastCreationError = new AtomicReference <>();
355354
@@ -452,7 +451,21 @@ private void onSessionCreatedSuccessfully(SessionImpl session) {
452451
453452 /**
454453 * Called when multiplexed session creation fails. This method stores the error temporarily,
455- * notifies waiting threads, and starts the maintainer for retry (unless UNIMPLEMENTED).
454+ * notifies waiting threads, and starts the maintainer for retry (unless it's a permanent error).
455+ *
456+ * <p>Permanent errors that should NOT be retried:
457+ *
458+ * <ul>
459+ * <li>UNIMPLEMENTED - multiplexed sessions are not supported
460+ * <li>DatabaseNotFoundException - the database doesn't exist
461+ * <li>InstanceNotFoundException - the instance doesn't exist
462+ * </ul>
463+ *
464+ * <p>Note: We do NOT set {@link #resourceNotFoundException} here because that field is used by
465+ * {@link #isValid()} to determine if the client should be recreated. Setting it during session
466+ * creation would cause {@link SpannerImpl#getDatabaseClient} to create a new client and retry,
467+ * which we don't want for permanent errors. Instead, we check if the error is a
468+ * ResourceNotFoundException when deciding whether to start the maintainer.
456469 */
457470 private void onSessionCreationFailed (Throwable t ) {
458471 creationLock .lock ();
@@ -465,15 +478,27 @@ private void onSessionCreationFailed(Throwable t) {
465478 // Notify all waiting threads
466479 creationLatch .countDown ();
467480 creationLatch = new CountDownLatch (1 );
468- // Start the maintainer even on failure (except for UNIMPLEMENTED) so it can retry
469- if (!unimplemented .get () && !maintainer .isStarted ()) {
481+ // Start the maintainer even on failure so it can retry, but NOT for permanent errors:
482+ // - UNIMPLEMENTED: multiplexed sessions are not supported
483+ // - ResourceNotFoundException: database or instance doesn't exist
484+ if (!unimplemented .get () && !isResourceNotFoundException (t ) && !maintainer .isStarted ()) {
470485 maintainer .start ();
471486 }
472487 } finally {
473488 creationLock .unlock ();
474489 }
475490 }
476491
492+ /**
493+ * Checks if the throwable is a {@link DatabaseNotFoundException} or {@link
494+ * InstanceNotFoundException}.
495+ */
496+ private boolean isResourceNotFoundException (Throwable t ) {
497+ SpannerException spannerException = SpannerExceptionFactory .asSpannerException (t );
498+ return spannerException instanceof DatabaseNotFoundException
499+ || spannerException instanceof InstanceNotFoundException ;
500+ }
501+
477502 /**
478503 * Waits for the initial session creation to complete if configured to do so. This method handles
479504 * the case where the session creation is still in progress or has failed.
@@ -504,6 +529,10 @@ private void maybeWaitForInitialSessionCreation(SessionPoolOptions sessionPoolOp
504529 * semantics: if no session exists and creation is not in progress, it triggers a new creation
505530 * attempt. If creation is in progress, it waits for the result.
506531 *
532+ * <p>This method uses a blocking lock (not tryLock) to ensure that the creationLatch is always
533+ * read while holding the lock, avoiding a race condition where a waiting thread could read an old
534+ * latch that has already been counted down and replaced.
535+ *
507536 * @return the session reference
508537 * @throws SpannerException if session creation fails
509538 */
@@ -530,91 +559,66 @@ SessionReference getOrCreateSessionReference() {
530559 ErrorCode .UNIMPLEMENTED , "Multiplexed sessions are not supported" );
531560 }
532561
533- // Try to acquire the lock for creation
534- if (creationLock .tryLock ()) {
535- try {
536- // Double-check after acquiring lock
537- sessionFuture = multiplexedSessionReference .get ();
538- if (sessionFuture != null ) {
539- try {
540- return sessionFuture .get ();
541- } catch (ExecutionException | InterruptedException e ) {
542- throw SpannerExceptionFactory .asSpannerException (
543- e .getCause () != null ? e .getCause () : e );
544- }
545- }
546-
547- // Check if creation is already in progress
548- if (creationInProgress .get ()) {
549- // Wait for the ongoing creation to complete
550- creationLock .unlock ();
551- return waitForSessionCreation ();
552- }
553-
554- // Start a new creation attempt
555- creationInProgress .set (true );
556- CountDownLatch currentLatch = creationLatch ;
557- creationLock .unlock ();
558-
559- // Trigger async session creation
560- sessionClient .asyncCreateMultiplexedSession (
561- new SessionConsumer () {
562- @ Override
563- public void onSessionReady (SessionImpl session ) {
564- onSessionCreatedSuccessfully (session );
565- }
566-
567- @ Override
568- public void onSessionCreateFailure (Throwable t , int createFailureForSessionCount ) {
569- onSessionCreationFailed (t );
570- }
571- });
562+ // Check if resource not found (database or instance) - don't retry in this case
563+ Throwable lastError = lastCreationError .get ();
564+ if (lastError != null && isResourceNotFoundException (lastError )) {
565+ throw SpannerExceptionFactory .asSpannerException (lastError );
566+ }
572567
573- // Wait for creation to complete
568+ // Use blocking lock to avoid race condition with latch replacement.
569+ // The latch must be read while holding the lock to ensure we wait on the correct latch.
570+ CountDownLatch latchToWaitOn ;
571+ boolean amTheCreator = false ;
572+ creationLock .lock ();
573+ try {
574+ // Re-check state after acquiring lock
575+ sessionFuture = multiplexedSessionReference .get ();
576+ if (sessionFuture != null ) {
574577 try {
575- currentLatch . await ();
576- } catch (InterruptedException e ) {
577- throw SpannerExceptionFactory .propagateInterrupt ( e );
578+ return sessionFuture . get ();
579+ } catch (ExecutionException | InterruptedException e ) {
580+ throw SpannerExceptionFactory .asSpannerException ( e . getCause () != null ? e . getCause () : e );
578581 }
582+ }
579583
580- // Check result
581- sessionFuture = multiplexedSessionReference .get ();
582- if (sessionFuture != null ) {
583- try {
584- return sessionFuture .get ();
585- } catch (ExecutionException | InterruptedException e ) {
586- throw SpannerExceptionFactory .asSpannerException (
587- e .getCause () != null ? e .getCause () : e );
588- }
589- }
584+ // Capture the current latch while holding the lock
585+ latchToWaitOn = creationLatch ;
590586
591- // Creation failed
592- Throwable error = lastCreationError .get ();
593- if (error != null ) {
594- throw SpannerExceptionFactory .asSpannerException (error );
595- }
596- throw SpannerExceptionFactory .newSpannerException (
597- ErrorCode .INTERNAL , "Failed to create multiplexed session" );
598- } finally {
599- if (creationLock .isHeldByCurrentThread ()) {
600- creationLock .unlock ();
601- }
587+ if (!creationInProgress .get ()) {
588+ // We are the creator
589+ amTheCreator = true ;
590+ creationInProgress .set (true );
602591 }
603- } else {
604- // Another thread is creating, wait for it
605- return waitForSessionCreation ();
592+ // If creationInProgress is true, we are a waiter
593+ } finally {
594+ creationLock . unlock ();
606595 }
607- }
608596
609- /** Waits for an ongoing session creation to complete and returns the result. */
610- private SessionReference waitForSessionCreation () {
597+ if (amTheCreator ) {
598+ // Trigger async session creation
599+ sessionClient .asyncCreateMultiplexedSession (
600+ new SessionConsumer () {
601+ @ Override
602+ public void onSessionReady (SessionImpl session ) {
603+ onSessionCreatedSuccessfully (session );
604+ }
605+
606+ @ Override
607+ public void onSessionCreateFailure (Throwable t , int createFailureForSessionCount ) {
608+ onSessionCreationFailed (t );
609+ }
610+ });
611+ }
612+
613+ // Wait for creation to complete (both creator and waiters wait on the same latch)
611614 try {
612- creationLatch .await ();
615+ latchToWaitOn .await ();
613616 } catch (InterruptedException e ) {
614617 throw SpannerExceptionFactory .propagateInterrupt (e );
615618 }
616619
617- ApiFuture <SessionReference > sessionFuture = multiplexedSessionReference .get ();
620+ // Check result
621+ sessionFuture = multiplexedSessionReference .get ();
618622 if (sessionFuture != null ) {
619623 try {
620624 return sessionFuture .get ();
0 commit comments