@@ -348,8 +348,7 @@ public void close() {
348348
349349 /**
350350 * The last error that occurred during session creation. This is stored temporarily and cleared
351- * when a session is successfully created. Unlike the previous implementation, this error is not
352- * cached forever - subsequent requests will retry session creation.
351+ * when a session is successfully created.
353352 */
354353 @ VisibleForTesting final AtomicReference <Throwable > lastCreationError = new AtomicReference <>();
355354
@@ -385,6 +384,7 @@ public void close() {
385384 this .multiplexedSessionReference = new AtomicReference <>(null );
386385 // Mark creation as in progress for the initial attempt
387386 this .creationInProgress .set (true );
387+ final CountDownLatch initialCreationLatch = this .creationLatch ;
388388 this .sessionClient .asyncCreateMultiplexedSession (
389389 new SessionConsumer () {
390390 @ Override
@@ -420,7 +420,7 @@ public void onSessionCreateFailure(Throwable t, int createFailureForSessionCount
420420 }
421421 });
422422 maybeWaitForInitialSessionCreation (
423- sessionClient .getSpanner ().getOptions ().getSessionPoolOptions ());
423+ sessionClient .getSpanner ().getOptions ().getSessionPoolOptions (), initialCreationLatch );
424424 }
425425
426426 void setPool (SessionPool pool ) {
@@ -452,7 +452,21 @@ private void onSessionCreatedSuccessfully(SessionImpl session) {
452452
453453 /**
454454 * Called when multiplexed session creation fails. This method stores the error temporarily,
455- * notifies waiting threads, and starts the maintainer for retry (unless UNIMPLEMENTED).
455+ * notifies waiting threads, and starts the maintainer for retry (unless it's a permanent error).
456+ *
457+ * <p>Permanent errors that should NOT be retried:
458+ *
459+ * <ul>
460+ * <li>UNIMPLEMENTED - multiplexed sessions are not supported
461+ * <li>DatabaseNotFoundException - the database doesn't exist
462+ * <li>InstanceNotFoundException - the instance doesn't exist
463+ * </ul>
464+ *
465+ * <p>Note: We do NOT set {@link #resourceNotFoundException} here because that field is used by
466+ * {@link #isValid()} to determine if the client should be recreated. Setting it during session
467+ * creation would cause {@link SpannerImpl#getDatabaseClient} to create a new client and retry,
468+ * which we don't want for permanent errors. Instead, we check if the error is a
469+ * ResourceNotFoundException when deciding whether to start the maintainer.
456470 */
457471 private void onSessionCreationFailed (Throwable t ) {
458472 creationLock .lock ();
@@ -465,25 +479,38 @@ private void onSessionCreationFailed(Throwable t) {
465479 // Notify all waiting threads
466480 creationLatch .countDown ();
467481 creationLatch = new CountDownLatch (1 );
468- // Start the maintainer even on failure (except for UNIMPLEMENTED) so it can retry
469- if (!unimplemented .get () && !maintainer .isStarted ()) {
482+ // Start the maintainer even on failure so it can retry, but NOT for permanent errors:
483+ // - UNIMPLEMENTED: multiplexed sessions are not supported
484+ // - ResourceNotFoundException: database or instance doesn't exist
485+ if (!unimplemented .get () && !isResourceNotFoundException (t ) && !maintainer .isStarted ()) {
470486 maintainer .start ();
471487 }
472488 } finally {
473489 creationLock .unlock ();
474490 }
475491 }
476492
493+ /**
494+ * Checks if the throwable is a {@link DatabaseNotFoundException} or {@link
495+ * InstanceNotFoundException}.
496+ */
497+ private boolean isResourceNotFoundException (Throwable t ) {
498+ SpannerException spannerException = SpannerExceptionFactory .asSpannerException (t );
499+ return spannerException instanceof DatabaseNotFoundException
500+ || spannerException instanceof InstanceNotFoundException ;
501+ }
502+
477503 /**
478504 * Waits for the initial session creation to complete if configured to do so. This method handles
479505 * the case where the session creation is still in progress or has failed.
480506 */
481- private void maybeWaitForInitialSessionCreation (SessionPoolOptions sessionPoolOptions ) {
507+ private void maybeWaitForInitialSessionCreation (
508+ SessionPoolOptions sessionPoolOptions , CountDownLatch latchToWaitOn ) {
482509 Duration waitDuration = sessionPoolOptions .getWaitForMinSessions ();
483510 if (waitDuration != null && !waitDuration .isZero ()) {
484511 long timeoutMillis = waitDuration .toMillis ();
485512 try {
486- if (!creationLatch .await (timeoutMillis , TimeUnit .MILLISECONDS )) {
513+ if (!latchToWaitOn .await (timeoutMillis , TimeUnit .MILLISECONDS )) {
487514 throw SpannerExceptionFactory .newSpannerException (
488515 ErrorCode .DEADLINE_EXCEEDED ,
489516 "Timed out after waiting " + timeoutMillis + "ms for multiplexed session creation" );
@@ -504,6 +531,10 @@ private void maybeWaitForInitialSessionCreation(SessionPoolOptions sessionPoolOp
504531 * semantics: if no session exists and creation is not in progress, it triggers a new creation
505532 * attempt. If creation is in progress, it waits for the result.
506533 *
534+ * <p>This method uses a blocking lock (not tryLock) to ensure that the creationLatch is always
535+ * read while holding the lock, avoiding a race condition where a waiting thread could read an old
536+ * latch that has already been counted down and replaced.
537+ *
507538 * @return the session reference
508539 * @throws SpannerException if session creation fails
509540 */
@@ -530,91 +561,66 @@ SessionReference getOrCreateSessionReference() {
530561 ErrorCode .UNIMPLEMENTED , "Multiplexed sessions are not supported" );
531562 }
532563
533- // Try to acquire the lock for creation
534- if (creationLock .tryLock ()) {
535- try {
536- // Double-check after acquiring lock
537- sessionFuture = multiplexedSessionReference .get ();
538- if (sessionFuture != null ) {
539- try {
540- return sessionFuture .get ();
541- } catch (ExecutionException | InterruptedException e ) {
542- throw SpannerExceptionFactory .asSpannerException (
543- e .getCause () != null ? e .getCause () : e );
544- }
545- }
546-
547- // Check if creation is already in progress
548- if (creationInProgress .get ()) {
549- // Wait for the ongoing creation to complete
550- creationLock .unlock ();
551- return waitForSessionCreation ();
552- }
553-
554- // Start a new creation attempt
555- creationInProgress .set (true );
556- CountDownLatch currentLatch = creationLatch ;
557- creationLock .unlock ();
558-
559- // Trigger async session creation
560- sessionClient .asyncCreateMultiplexedSession (
561- new SessionConsumer () {
562- @ Override
563- public void onSessionReady (SessionImpl session ) {
564- onSessionCreatedSuccessfully (session );
565- }
566-
567- @ Override
568- public void onSessionCreateFailure (Throwable t , int createFailureForSessionCount ) {
569- onSessionCreationFailed (t );
570- }
571- });
564+ // Check if resource not found (database or instance) - don't retry in this case
565+ Throwable lastError = lastCreationError .get ();
566+ if (lastError != null && isResourceNotFoundException (lastError )) {
567+ throw SpannerExceptionFactory .asSpannerException (lastError );
568+ }
572569
573- // Wait for creation to complete
570+ // Use blocking lock to avoid race condition with latch replacement.
571+ // The latch must be read while holding the lock to ensure we wait on the correct latch.
572+ CountDownLatch latchToWaitOn ;
573+ boolean amTheCreator = false ;
574+ creationLock .lock ();
575+ try {
576+ // Re-check state after acquiring lock
577+ sessionFuture = multiplexedSessionReference .get ();
578+ if (sessionFuture != null ) {
574579 try {
575- currentLatch . await ();
576- } catch (InterruptedException e ) {
577- throw SpannerExceptionFactory .propagateInterrupt ( e );
580+ return sessionFuture . get ();
581+ } catch (ExecutionException | InterruptedException e ) {
582+ throw SpannerExceptionFactory .asSpannerException ( e . getCause () != null ? e . getCause () : e );
578583 }
584+ }
579585
580- // Check result
581- sessionFuture = multiplexedSessionReference .get ();
582- if (sessionFuture != null ) {
583- try {
584- return sessionFuture .get ();
585- } catch (ExecutionException | InterruptedException e ) {
586- throw SpannerExceptionFactory .asSpannerException (
587- e .getCause () != null ? e .getCause () : e );
588- }
589- }
586+ // Capture the current latch while holding the lock
587+ latchToWaitOn = creationLatch ;
590588
591- // Creation failed
592- Throwable error = lastCreationError .get ();
593- if (error != null ) {
594- throw SpannerExceptionFactory .asSpannerException (error );
595- }
596- throw SpannerExceptionFactory .newSpannerException (
597- ErrorCode .INTERNAL , "Failed to create multiplexed session" );
598- } finally {
599- if (creationLock .isHeldByCurrentThread ()) {
600- creationLock .unlock ();
601- }
589+ if (!creationInProgress .get ()) {
590+ // We are the creator
591+ amTheCreator = true ;
592+ creationInProgress .set (true );
602593 }
603- } else {
604- // Another thread is creating, wait for it
605- return waitForSessionCreation ();
594+ // If creationInProgress is true, we are a waiter
595+ } finally {
596+ creationLock . unlock ();
606597 }
607- }
608598
609- /** Waits for an ongoing session creation to complete and returns the result. */
610- private SessionReference waitForSessionCreation () {
599+ if (amTheCreator ) {
600+ // Trigger async session creation
601+ sessionClient .asyncCreateMultiplexedSession (
602+ new SessionConsumer () {
603+ @ Override
604+ public void onSessionReady (SessionImpl session ) {
605+ onSessionCreatedSuccessfully (session );
606+ }
607+
608+ @ Override
609+ public void onSessionCreateFailure (Throwable t , int createFailureForSessionCount ) {
610+ onSessionCreationFailed (t );
611+ }
612+ });
613+ }
614+
615+ // Wait for creation to complete (both creator and waiters wait on the same latch)
611616 try {
612- creationLatch .await ();
617+ latchToWaitOn .await ();
613618 } catch (InterruptedException e ) {
614619 throw SpannerExceptionFactory .propagateInterrupt (e );
615620 }
616621
617- ApiFuture <SessionReference > sessionFuture = multiplexedSessionReference .get ();
622+ // Check result
623+ sessionFuture = multiplexedSessionReference .get ();
618624 if (sessionFuture != null ) {
619625 try {
620626 return sessionFuture .get ();
0 commit comments