From 24d5a7733e5fb9306c1b8d3e0383e8d6020621fb Mon Sep 17 00:00:00 2001 From: Paul Medynski <31868385+paulmedynski@users.noreply.github.com> Date: Thu, 7 May 2026 10:46:14 -0300 Subject: [PATCH 1/5] Fix failover parser-state handling and add targeted regression tests --- .../Connection/SqlConnectionInternal.cs | 10 +- .../ConnectionFailoverTests.cs | 279 +++++++++++++++++- .../TDS.Servers/TransientTdsErrorTdsServer.cs | 2 +- .../TransientTdsErrorTdsServerArguments.cs | 7 + 4 files changed, 295 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/Connection/SqlConnectionInternal.cs b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/Connection/SqlConnectionInternal.cs index e62541137c..fa012d0ba7 100644 --- a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/Connection/SqlConnectionInternal.cs +++ b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/Connection/SqlConnectionInternal.cs @@ -3635,7 +3635,15 @@ private void LoginWithFailover( continue; } - if (IsDoNotRetryConnectError(sqlex) || timeout.IsExpired) + // If state != closed, indicates that the parser encountered an error while + // processing the login response (e.g. an explicit error token). Transient + // network errors that impact connectivity will result in parser state being + // closed. Only network-level errors should trigger failover alternation; + // login-phase errors (like transient errors) should be thrown so they can + // be handled by the outer ConnectRetryCount loop. + if (_parser?.State is not TdsParserState.Closed || + IsDoNotRetryConnectError(sqlex) || + timeout.IsExpired) { // No more time to try again. // Caller will call LoginFailure() diff --git a/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs b/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs index d3a0bcd07b..22789ccc8d 100644 --- a/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs +++ b/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs @@ -4,6 +4,7 @@ using System; using System.Data; +using System.Threading.Tasks; using Microsoft.Data.SqlClient.Connection; using Microsoft.Data.SqlClient.Tests.Common; using Microsoft.SqlServer.TDS.Servers; @@ -172,7 +173,7 @@ public void NetworkTimeout_ShouldFail() InitialCatalog = "master",// Required for failover partner to work ConnectTimeout = 1, ConnectRetryInterval = 1, - ConnectRetryCount = 0, // Disable retry + ConnectRetryCount = 0, // Disable retry Encrypt = false, MultiSubnetFailover = false, #if NETFRAMEWORK @@ -336,6 +337,10 @@ public void NetworkError_WithUserProvidedPartner_RetryEnabled_ShouldConnectToFai Assert.Equal(1, failoverServer.PreLoginCount - failoverServer.AbandonedPreLoginCount); } + /// + /// Verifies login-phase transient SQL errors are retried on the primary endpoint and + /// do not trigger failover-partner alternation. + /// [Theory] [InlineData(40613)] [InlineData(42108)] @@ -372,6 +377,8 @@ public void TransientFault_ShouldConnectToPrimary(uint errorCode) using SqlConnection connection = new(builder.ConnectionString); // Act + // First login receives the transient token; outer connect retry opens a fresh parser + // and retries against the same primary endpoint. connection.Open(); // Assert @@ -380,6 +387,8 @@ public void TransientFault_ShouldConnectToPrimary(uint errorCode) // Failures should prompt the client to return to the original server, resulting in a login count of 2 Assert.Equal(2, server.PreLoginCount - server.AbandonedPreLoginCount); + // The fix: login-phase errors must NOT trigger failover alternation + Assert.Equal(0, failoverServer.PreLoginCount); } [Theory] @@ -430,6 +439,10 @@ public void TransientFault_RetryDisabled_ShouldFail(uint errorCode) Assert.Fail(); } + /// + /// Verifies user-provided failover partner does not change behavior for login-phase + /// transient SQL errors; retries stay on primary. + /// [Theory] [InlineData(40613)] [InlineData(42108)] @@ -467,6 +480,8 @@ public void TransientFault_WithUserProvidedPartner_ShouldConnectToPrimary(uint e using SqlConnection connection = new(builder.ConnectionString); // Act + // Even with a configured partner, this path should use outer connect retry + // against primary rather than alternation inside LoginWithFailover. connection.Open(); // Assert @@ -475,6 +490,8 @@ public void TransientFault_WithUserProvidedPartner_ShouldConnectToPrimary(uint e // Failures should prompt the client to return to the original server, resulting in a login count of 2 Assert.Equal(2, server.PreLoginCount - server.AbandonedPreLoginCount); + // The fix: login-phase errors must NOT trigger failover alternation + Assert.Equal(0, failoverServer.PreLoginCount); } [Theory] @@ -591,5 +608,265 @@ public void TransientFault_IgnoreServerProvidedFailoverPartner_ShouldConnectToUs // 1 for the failover connection Assert.Equal(1, failoverServer.PreLoginCount - failoverServer.AbandonedPreLoginCount); } + + /// + /// Async parity for primary-only retry behavior on login-phase transient SQL errors. + /// + [Theory] + [InlineData(40613)] + [InlineData(42108)] + [InlineData(42109)] + public async Task TransientFault_Async_ShouldConnectToPrimary_NotFailover(uint errorCode) + { + // Async parity for TransientFault_ShouldConnectToPrimary. + // A transient login-token error must be retried against the primary; + // the failover partner must never be contacted. + + using TdsServer failoverServer = new( + new TdsServerArguments + { + FailoverPartner = "localhost,1234", + }); + failoverServer.Start(); + + using TransientTdsErrorTdsServer server = new( + new TransientTdsErrorTdsServerArguments() + { + IsEnabledTransientError = true, + Number = errorCode, + FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}", + }); + server.Start(); + + SqlConnectionStringBuilder builder = new() + { + DataSource = $"localhost,{server.EndPoint.Port}", + InitialCatalog = "master", + ConnectTimeout = 30, + ConnectRetryInterval = 1, + Encrypt = false, + Pooling = false, + }; + using SqlConnection connection = new(builder.ConnectionString); + + // Asserts async open follows the same retry and failover-selection rules as sync. + await connection.OpenAsync(); + + Assert.Equal(ConnectionState.Open, connection.State); + Assert.Equal($"localhost,{server.EndPoint.Port}", connection.DataSource); + Assert.Equal(2, server.PreLoginCount - server.AbandonedPreLoginCount); + // The fix: login-phase errors must NOT trigger failover alternation + Assert.Equal(0, failoverServer.PreLoginCount); + } + + /// + /// Async parity with user-provided partner: login-phase transient SQL errors should + /// still retry on primary without failover alternation. + /// + [Theory] + [InlineData(40613)] + [InlineData(42108)] + [InlineData(42109)] + public async Task TransientFault_WithUserProvidedPartner_Async_ShouldConnectToPrimary_NotFailover(uint errorCode) + { + // Async parity for TransientFault_WithUserProvidedPartner_ShouldConnectToPrimary. + // Even with a user-provided failover partner, a login-token error must not + // cause alternation to the failover server. + + using TdsServer failoverServer = new( + new TdsServerArguments + { + FailoverPartner = "localhost:1234", + }); + failoverServer.Start(); + + using TransientTdsErrorTdsServer server = new( + new TransientTdsErrorTdsServerArguments() + { + IsEnabledTransientError = true, + Number = errorCode, + FailoverPartner = $"localhost:{failoverServer.EndPoint.Port}", + }); + server.Start(); + + SqlConnectionStringBuilder builder = new() + { + DataSource = $"localhost,{server.EndPoint.Port}", + InitialCatalog = "master", + ConnectTimeout = 30, + ConnectRetryInterval = 1, + Encrypt = false, + FailoverPartner = $"localhost:{failoverServer.EndPoint.Port}", + }; + using SqlConnection connection = new(builder.ConnectionString); + + // Asserts async open with explicit partner still avoids failover alternation. + await connection.OpenAsync(); + + Assert.Equal(ConnectionState.Open, connection.State); + Assert.Equal($"localhost,{server.EndPoint.Port}", connection.DataSource); + Assert.Equal(2, server.PreLoginCount - server.AbandonedPreLoginCount); + // The fix: login-phase errors must NOT trigger failover alternation + Assert.Equal(0, failoverServer.PreLoginCount); + } + + /// + /// Verifies pooled connections are not cleared and failover is not attempted when a + /// login-phase transient SQL error occurs with a user-provided failover partner. + /// + [Theory] + [InlineData(40613)] + [InlineData(42108)] + [InlineData(42109)] + public void TransientFault_WithUserProvidedPartner_Pooling_ShouldNotClearPool_NotFailover(uint errorCode) + { + // With pooling enabled and a user-provided failover partner, a transient + // login-token error must not clear the pool and must not contact the failover server. + + using TdsServer failoverServer = new( + new TdsServerArguments + { + FailoverPartner = "localhost,1234", + }); + failoverServer.Start(); + + // Start with errors disabled so the pool warms up successfully. + using TransientTdsErrorTdsServer server = new( + new TransientTdsErrorTdsServerArguments() + { + FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}", + }); + server.Start(); + + SqlConnectionStringBuilder builder = new() + { + DataSource = $"localhost,{server.EndPoint.Port}", + InitialCatalog = "master", + ConnectTimeout = 30, + ConnectRetryInterval = 1, + Encrypt = SqlConnectionEncryptOption.Optional, + FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}", + Pooling = true, + }; + + // Warm up the pool. + using SqlConnection warmup = new(builder.ConnectionString); + warmup.Open(); + warmup.Close(); + + // Enable the transient error for the next login attempt. + server.SetErrorBehavior(true, errorCode); + + // ConnectRetryCount > 0 (default 1) so the client retries and succeeds. + using SqlConnection connection = new(builder.ConnectionString); + connection.Open(); + + Assert.Equal(ConnectionState.Open, connection.State); + Assert.Equal($"localhost,{server.EndPoint.Port}", connection.DataSource); + // Failover server must never have been contacted. + Assert.Equal(0, failoverServer.PreLoginCount); + } + + /// + /// Verifies ConnectRetryCount=0 propagates login-phase transient SQL errors immediately + /// and never attempts failover alternation. + /// + [Theory] + [InlineData(40613)] + [InlineData(42108)] + [InlineData(42109)] + public void TransientFault_RetryDisabled_WithUserProvidedPartner_ShouldFail_NotFailover(uint errorCode) + { + // When ConnectRetryCount = 0 and the server returns a login-phase token error, + // the exception must propagate immediately and the failover partner must not be + // contacted (parser state is not Closed, so the new guard must kick in). + + using TdsServer failoverServer = new( + new TdsServerArguments + { + FailoverPartner = "localhost:1234", + }); + failoverServer.Start(); + + using TransientTdsErrorTdsServer server = new( + new TransientTdsErrorTdsServerArguments() + { + IsEnabledTransientError = true, + Number = errorCode, + FailoverPartner = $"localhost:{failoverServer.EndPoint.Port}", + }); + server.Start(); + + SqlConnectionStringBuilder builder = new() + { + DataSource = $"localhost,{server.EndPoint.Port}", + InitialCatalog = "master", + ConnectTimeout = 30, + ConnectRetryInterval = 1, + ConnectRetryCount = 0, + Encrypt = false, + FailoverPartner = $"localhost:{failoverServer.EndPoint.Port}", + }; + using SqlConnection connection = new(builder.ConnectionString); + + // No outer connect retry is allowed, so the first transient error should surface. + SqlException ex = Assert.Throws(() => connection.Open()); + + Assert.Equal((int)errorCode, ex.Number); + Assert.Equal(ConnectionState.Closed, connection.State); + // The parser was not closed (login-phase error), so the failover alternation branch + // must not have been entered. + Assert.Equal(0, failoverServer.PreLoginCount); + } + + /// + /// Isolates the parser-state guard by using a non-fatal login error token: without the + /// guard, LoginWithFailover alternates to partner; with the guard, retry stays on primary. + /// + [Fact] + public void NonFatalTransientLoginError_WithUserProvidedPartner_ShouldRetryPrimary_NotFailover() + { + // This test isolates the parser-state guard added to LoginWithFailover. + // We emit a transient login error with non-fatal severity so the connection + // is not automatically doomed/broken by existing breakConnection logic. + + using TdsServer failoverServer = new( + new TdsServerArguments + { + FailoverPartner = "localhost,1234", + }); + failoverServer.Start(); + + using TransientTdsErrorTdsServer server = new( + new TransientTdsErrorTdsServerArguments() + { + IsEnabledTransientError = true, + Number = 40613, + // Use non-fatal severity so break/doom logic does not short-circuit the path. + ErrorClass = 16, + RepeatCount = 1, + FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}", + }); + server.Start(); + + SqlConnectionStringBuilder builder = new() + { + DataSource = $"localhost,{server.EndPoint.Port}", + InitialCatalog = "master", + ConnectTimeout = 30, + ConnectRetryInterval = 1, + Encrypt = false, + Pooling = false, + FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}", + }; + + using SqlConnection connection = new(builder.ConnectionString); + connection.Open(); + + Assert.Equal(ConnectionState.Open, connection.State); + Assert.Equal($"localhost,{server.EndPoint.Port}", connection.DataSource); + Assert.Equal(2, server.PreLoginCount - server.AbandonedPreLoginCount); + Assert.Equal(0, failoverServer.PreLoginCount - failoverServer.AbandonedPreLoginCount); + } } } diff --git a/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/TransientTdsErrorTdsServer.cs b/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/TransientTdsErrorTdsServer.cs index 6ec7ac2528..ba20fb4d02 100644 --- a/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/TransientTdsErrorTdsServer.cs +++ b/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/TransientTdsErrorTdsServer.cs @@ -89,7 +89,7 @@ private TDSMessageCollection GenerateErrorMessage(TDSMessage request) TDSUtilities.Log(Arguments.Log, "Request", request); // Prepare ERROR token with the denial details - TDSErrorToken errorToken = new TDSErrorToken(errorNumber, 1, 20, errorMessage); + TDSErrorToken errorToken = new TDSErrorToken(errorNumber, 1, Arguments.ErrorClass, errorMessage); // Log response TDSUtilities.Log(Arguments.Log, "Response", errorToken); diff --git a/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/TransientTdsErrorTdsServerArguments.cs b/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/TransientTdsErrorTdsServerArguments.cs index 6a61d9cc6f..1be1277c9e 100644 --- a/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/TransientTdsErrorTdsServerArguments.cs +++ b/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/TransientTdsErrorTdsServerArguments.cs @@ -25,5 +25,12 @@ public class TransientTdsErrorTdsServerArguments : TdsServerArguments /// The number of times the transient error should be raised. /// public int RepeatCount { get; set; } = 1; + + /// + /// Error class (severity) to emit in ERROR token. + /// Fatal starts at 20 (TdsEnums.FATAL_ERROR_CLASS), so use values below 20 + /// to avoid automatic break/doom behavior in the client. + /// + public byte ErrorClass { get; set; } = 20; } } From 3920395ca7673668e1d924d72d0623e02c76bc0e Mon Sep 17 00:00:00 2001 From: Paul Medynski <31868385+paulmedynski@users.noreply.github.com> Date: Thu, 7 May 2026 11:00:10 -0300 Subject: [PATCH 2/5] Removed unnecessary commenet snippets. --- .../SimulatedServerTests/ConnectionFailoverTests.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs b/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs index 22789ccc8d..930348114d 100644 --- a/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs +++ b/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs @@ -387,7 +387,7 @@ public void TransientFault_ShouldConnectToPrimary(uint errorCode) // Failures should prompt the client to return to the original server, resulting in a login count of 2 Assert.Equal(2, server.PreLoginCount - server.AbandonedPreLoginCount); - // The fix: login-phase errors must NOT trigger failover alternation + // Login-phase errors must NOT trigger failover alternation Assert.Equal(0, failoverServer.PreLoginCount); } @@ -490,7 +490,7 @@ public void TransientFault_WithUserProvidedPartner_ShouldConnectToPrimary(uint e // Failures should prompt the client to return to the original server, resulting in a login count of 2 Assert.Equal(2, server.PreLoginCount - server.AbandonedPreLoginCount); - // The fix: login-phase errors must NOT trigger failover alternation + // Login-phase errors must NOT trigger failover alternation Assert.Equal(0, failoverServer.PreLoginCount); } @@ -655,7 +655,7 @@ public async Task TransientFault_Async_ShouldConnectToPrimary_NotFailover(uint e Assert.Equal(ConnectionState.Open, connection.State); Assert.Equal($"localhost,{server.EndPoint.Port}", connection.DataSource); Assert.Equal(2, server.PreLoginCount - server.AbandonedPreLoginCount); - // The fix: login-phase errors must NOT trigger failover alternation + // Login-phase errors must NOT trigger failover alternation Assert.Equal(0, failoverServer.PreLoginCount); } @@ -706,7 +706,7 @@ public async Task TransientFault_WithUserProvidedPartner_Async_ShouldConnectToPr Assert.Equal(ConnectionState.Open, connection.State); Assert.Equal($"localhost,{server.EndPoint.Port}", connection.DataSource); Assert.Equal(2, server.PreLoginCount - server.AbandonedPreLoginCount); - // The fix: login-phase errors must NOT trigger failover alternation + // Login-phase errors must NOT trigger failover alternation Assert.Equal(0, failoverServer.PreLoginCount); } From 5d7d795231e7366d7bd37984ee5df9f0d64ac074 Mon Sep 17 00:00:00 2001 From: Paul Medynski <31868385+paulmedynski@users.noreply.github.com> Date: Thu, 7 May 2026 11:12:36 -0300 Subject: [PATCH 3/5] Strengthen failover pooling test assertions and clarify ErrorClass docs --- .../ConnectionFailoverTests.cs | 21 +++++++++++++++---- .../TransientTdsErrorTdsServerArguments.cs | 5 +++-- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs b/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs index 930348114d..dfa79a08d3 100644 --- a/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs +++ b/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs @@ -749,10 +749,9 @@ public void TransientFault_WithUserProvidedPartner_Pooling_ShouldNotClearPool_No Pooling = true, }; - // Warm up the pool. + // Keep one connection open so the next Open() cannot reuse it and must perform login. using SqlConnection warmup = new(builder.ConnectionString); warmup.Open(); - warmup.Close(); // Enable the transient error for the next login attempt. server.SetErrorBehavior(true, errorCode); @@ -760,11 +759,25 @@ public void TransientFault_WithUserProvidedPartner_Pooling_ShouldNotClearPool_No // ConnectRetryCount > 0 (default 1) so the client retries and succeeds. using SqlConnection connection = new(builder.ConnectionString); connection.Open(); - Assert.Equal(ConnectionState.Open, connection.State); Assert.Equal($"localhost,{server.EndPoint.Port}", connection.DataSource); + + connection.Close(); + warmup.Close(); + + // If the pool is not cleared, this open should reuse a pooled connection without a new login. + using SqlConnection pooledConnection = new(builder.ConnectionString); + pooledConnection.Open(); + + Assert.Equal(ConnectionState.Open, pooledConnection.State); + Assert.Equal($"localhost,{server.EndPoint.Port}", pooledConnection.DataSource); + + // 1 warmup login + 1 failed login + 1 retry login. + Assert.Equal(3, server.PreLoginCount - server.AbandonedPreLoginCount); + Assert.Equal(3, server.Login7Count); // Failover server must never have been contacted. - Assert.Equal(0, failoverServer.PreLoginCount); + Assert.Equal(0, failoverServer.PreLoginCount - failoverServer.AbandonedPreLoginCount); + Assert.Equal(0, failoverServer.Login7Count); } /// diff --git a/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/TransientTdsErrorTdsServerArguments.cs b/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/TransientTdsErrorTdsServerArguments.cs index 1be1277c9e..5f1adacd61 100644 --- a/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/TransientTdsErrorTdsServerArguments.cs +++ b/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/TransientTdsErrorTdsServerArguments.cs @@ -28,8 +28,9 @@ public class TransientTdsErrorTdsServerArguments : TdsServerArguments /// /// Error class (severity) to emit in ERROR token. - /// Fatal starts at 20 (TdsEnums.FATAL_ERROR_CLASS), so use values below 20 - /// to avoid automatic break/doom behavior in the client. + /// The default is 20 to preserve existing fatal-error behavior. + /// Fatal starts at 20 (TdsEnums.FATAL_ERROR_CLASS), so set values below 20 + /// when a test needs to avoid automatic break/doom behavior in the client. /// public byte ErrorClass { get; set; } = 20; } From c585c25dd25bdeff8bf03ee215bab1e25155380d Mon Sep 17 00:00:00 2001 From: Paul Medynski <31868385+paulmedynski@users.noreply.github.com> Date: Thu, 7 May 2026 12:29:50 -0300 Subject: [PATCH 4/5] Fix colon port syntax in FailoverPartner values in async and retry-disabled failover tests --- .../SimulatedServerTests/ConnectionFailoverTests.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs b/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs index dfa79a08d3..1a742bb0de 100644 --- a/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs +++ b/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs @@ -676,7 +676,7 @@ public async Task TransientFault_WithUserProvidedPartner_Async_ShouldConnectToPr using TdsServer failoverServer = new( new TdsServerArguments { - FailoverPartner = "localhost:1234", + FailoverPartner = "localhost,1234", }); failoverServer.Start(); @@ -685,7 +685,7 @@ public async Task TransientFault_WithUserProvidedPartner_Async_ShouldConnectToPr { IsEnabledTransientError = true, Number = errorCode, - FailoverPartner = $"localhost:{failoverServer.EndPoint.Port}", + FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}", }); server.Start(); @@ -696,7 +696,7 @@ public async Task TransientFault_WithUserProvidedPartner_Async_ShouldConnectToPr ConnectTimeout = 30, ConnectRetryInterval = 1, Encrypt = false, - FailoverPartner = $"localhost:{failoverServer.EndPoint.Port}", + FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}", }; using SqlConnection connection = new(builder.ConnectionString); @@ -797,7 +797,7 @@ public void TransientFault_RetryDisabled_WithUserProvidedPartner_ShouldFail_NotF using TdsServer failoverServer = new( new TdsServerArguments { - FailoverPartner = "localhost:1234", + FailoverPartner = "localhost,1234", }); failoverServer.Start(); @@ -806,7 +806,7 @@ public void TransientFault_RetryDisabled_WithUserProvidedPartner_ShouldFail_NotF { IsEnabledTransientError = true, Number = errorCode, - FailoverPartner = $"localhost:{failoverServer.EndPoint.Port}", + FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}", }); server.Start(); @@ -818,7 +818,7 @@ public void TransientFault_RetryDisabled_WithUserProvidedPartner_ShouldFail_NotF ConnectRetryInterval = 1, ConnectRetryCount = 0, Encrypt = false, - FailoverPartner = $"localhost:{failoverServer.EndPoint.Port}", + FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}", }; using SqlConnection connection = new(builder.ConnectionString); From c4f2cc5ef889d207ca3c7fb8fa82e9317194043b Mon Sep 17 00:00:00 2001 From: Paul Medynski <31868385+paulmedynski@users.noreply.github.com> Date: Mon, 11 May 2026 08:32:20 -0300 Subject: [PATCH 5/5] Add failover legacy AppContext switch and docs --- .github/copilot-instructions.md | 1 + .github/instructions/features.instructions.md | 1 + AGENTS.md | 9 ++-- .../Connection/SqlConnectionInternal.cs | 4 +- .../Data/SqlClient/LocalAppContextSwitches.cs | 25 ++++++++++ .../Common/LocalAppContextSwitchesHelper.cs | 15 ++++++ .../SqlClient/LocalAppContextSwitchesTest.cs | 1 + .../ConnectionFailoverTests.cs | 50 +++++++++++++++++++ 8 files changed, 101 insertions(+), 5 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 0c78f461cb..cebe6927c3 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -50,6 +50,7 @@ This project includes several key products and libraries that facilitate SQL Ser - **Data Encryption**: Supports data encryption for secure data transmission. - **Logging and Diagnostics**: Provides event source tracing diagnostic capabilities for troubleshooting. - **Failover Support**: Handles automatic failover scenarios for high availability. + - Compatibility switch: `Switch.Microsoft.Data.SqlClient.UseLegacyFailoverAlternationOnLoginSqlErrors` (default `false`) can restore legacy alternation behavior in `LoginWithFailover` for login-phase SQL errors. - **Cross-Platform Support**: Compatible with both .NET Framework and .NET Core, allowing applications to run on Windows, Linux, and macOS. - **Column Encryption AKV Provider**: Supports Azure Key Vault (AKV) provider for acquiring keys from Azure Key Vault to be used for encryption and decryption. diff --git a/.github/instructions/features.instructions.md b/.github/instructions/features.instructions.md index 3ecba2cc4e..34262b8db6 100644 --- a/.github/instructions/features.instructions.md +++ b/.github/instructions/features.instructions.md @@ -246,6 +246,7 @@ AppContext switches allow runtime behavior changes without modifying connection | `Switch.Microsoft.Data.SqlClient.EnableMultiSubnetFailoverByDefault` | `false` | Sets `MultiSubnetFailover=true` as the default for all connections | | `Switch.Microsoft.Data.SqlClient.EnableUserAgent` | varies | Controls sending user agent information to SQL Server | | `Switch.Microsoft.Data.SqlClient.IgnoreServerProvidedFailoverPartner` | `false` | Ignores failover partner information sent by the server | +| `Switch.Microsoft.Data.SqlClient.UseLegacyFailoverAlternationOnLoginSqlErrors` | `false` | Restores legacy `LoginWithFailover` alternation for login-phase SQL errors when parser state is not `Closed` | | `Switch.Microsoft.Data.SqlClient.LegacyRowVersionNullBehavior` | `false` | Restores legacy null handling for rowversion columns | | `Switch.Microsoft.Data.SqlClient.LegacyVarTimeZeroScaleBehaviour` | `false` | Restores legacy zero-scale behavior for time/datetime2/datetimeoffset | | `Switch.Microsoft.Data.SqlClient.MakeReadAsyncBlocking` | `false` | Makes ReadAsync behave synchronously (legacy compat) | diff --git a/AGENTS.md b/AGENTS.md index 690e9c9c8d..4c4d7d04e1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -68,10 +68,11 @@ Do **not** create branches directly under `main`, `dev/`, or any other top-level ### Bug Fix Workflow 1. Understand the issue from the bug report 2. Locate relevant code in `src/Microsoft.Data.SqlClient/src/` (do NOT modify legacy `netcore/src/` or `netfx/src/`) -3. Write a failing test that reproduces the issue -4. Implement the fix -5. Ensure all tests pass -6. Update documentation if behavior changes +3. Check `.github/instructions/features.instructions.md` for existing AppContext switches (including failover compatibility switches) before introducing behavior changes +4. Write a failing test that reproduces the issue +5. Implement the fix +6. Ensure all tests pass +7. Update documentation if behavior changes ### Feature Implementation 1. Review the feature specification diff --git a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/Connection/SqlConnectionInternal.cs b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/Connection/SqlConnectionInternal.cs index fa012d0ba7..6d067bab7f 100644 --- a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/Connection/SqlConnectionInternal.cs +++ b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/Connection/SqlConnectionInternal.cs @@ -3635,13 +3635,15 @@ private void LoginWithFailover( continue; } + bool isLoginPhaseSqlError = _parser?.State is not TdsParserState.Closed; + // If state != closed, indicates that the parser encountered an error while // processing the login response (e.g. an explicit error token). Transient // network errors that impact connectivity will result in parser state being // closed. Only network-level errors should trigger failover alternation; // login-phase errors (like transient errors) should be thrown so they can // be handled by the outer ConnectRetryCount loop. - if (_parser?.State is not TdsParserState.Closed || + if ((isLoginPhaseSqlError && !LocalAppContextSwitches.UseLegacyFailoverAlternationOnLoginSqlErrors) || IsDoNotRetryConnectError(sqlex) || timeout.IsExpired) { diff --git a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/LocalAppContextSwitches.cs b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/LocalAppContextSwitches.cs index 16e0abb7ec..0177338bf2 100644 --- a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/LocalAppContextSwitches.cs +++ b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/LocalAppContextSwitches.cs @@ -58,6 +58,13 @@ internal static class LocalAppContextSwitches private const string IgnoreServerProvidedFailoverPartnerString = "Switch.Microsoft.Data.SqlClient.IgnoreServerProvidedFailoverPartner"; + /// + /// The name of the app context switch that controls whether failover + /// alternation should use legacy behavior for login-phase SQL errors. + /// + private const string UseLegacyFailoverAlternationOnLoginSqlErrorsString = + "Switch.Microsoft.Data.SqlClient.UseLegacyFailoverAlternationOnLoginSqlErrors"; + /// /// The name of the app context switch that controls whether to preserve /// legacy behavior where Timestamp/RowVersion fields return empty byte @@ -182,6 +189,11 @@ private enum SwitchValue : byte /// private static SwitchValue s_ignoreServerProvidedFailoverPartner = SwitchValue.None; + /// + /// The cached value of the UseLegacyFailoverAlternationOnLoginSqlErrors switch. + /// + private static SwitchValue s_useLegacyFailoverAlternationOnLoginSqlErrors = SwitchValue.None; + /// /// The cached value of the LegacyRowVersionNullBehavior switch. /// @@ -409,6 +421,19 @@ public static bool GlobalizationInvariantMode defaultValue: false, ref s_ignoreServerProvidedFailoverPartner); + /// + /// When set to true, LoginWithFailover preserves legacy behavior and may + /// alternate to the failover partner on login-phase SQL errors where the + /// parser state is not Closed. + /// + /// The default value of this switch is false. + /// + public static bool UseLegacyFailoverAlternationOnLoginSqlErrors => + AcquireAndReturn( + UseLegacyFailoverAlternationOnLoginSqlErrorsString, + defaultValue: false, + ref s_useLegacyFailoverAlternationOnLoginSqlErrors); + /// /// In System.Data.SqlClient and Microsoft.Data.SqlClient prior to 3.0.0 a /// field with type Timestamp/RowVersion would return an empty byte array. diff --git a/src/Microsoft.Data.SqlClient/tests/Common/LocalAppContextSwitchesHelper.cs b/src/Microsoft.Data.SqlClient/tests/Common/LocalAppContextSwitchesHelper.cs index 8102f47d51..11523b3f83 100644 --- a/src/Microsoft.Data.SqlClient/tests/Common/LocalAppContextSwitchesHelper.cs +++ b/src/Microsoft.Data.SqlClient/tests/Common/LocalAppContextSwitchesHelper.cs @@ -47,6 +47,7 @@ public sealed class LocalAppContextSwitchesHelper : IDisposable private readonly bool? _globalizationInvariantModeOriginal; #endif private readonly bool? _ignoreServerProvidedFailoverPartnerOriginal; + private readonly bool? _useLegacyFailoverAlternationOnLoginSqlErrorsOriginal; private readonly bool? _legacyRowVersionNullBehaviorOriginal; private readonly bool? _legacyVarTimeZeroScaleBehaviourOriginal; private readonly bool? _makeReadAsyncBlockingOriginal; @@ -98,6 +99,8 @@ public LocalAppContextSwitchesHelper() #endif _ignoreServerProvidedFailoverPartnerOriginal = GetSwitchValue("s_ignoreServerProvidedFailoverPartner"); + _useLegacyFailoverAlternationOnLoginSqlErrorsOriginal = + GetSwitchValue("s_useLegacyFailoverAlternationOnLoginSqlErrors"); _legacyRowVersionNullBehaviorOriginal = GetSwitchValue("s_legacyRowVersionNullBehavior"); _legacyVarTimeZeroScaleBehaviourOriginal = @@ -154,6 +157,9 @@ public void Dispose() SetSwitchValue( "s_ignoreServerProvidedFailoverPartner", _ignoreServerProvidedFailoverPartnerOriginal); + SetSwitchValue( + "s_useLegacyFailoverAlternationOnLoginSqlErrors", + _useLegacyFailoverAlternationOnLoginSqlErrorsOriginal); SetSwitchValue( "s_legacyRowVersionNullBehavior", _legacyRowVersionNullBehaviorOriginal); @@ -242,6 +248,15 @@ public bool? IgnoreServerProvidedFailoverPartner set => SetSwitchValue("s_ignoreServerProvidedFailoverPartner", value); } + /// + /// Get or set the UseLegacyFailoverAlternationOnLoginSqlErrors switch value. + /// + public bool? UseLegacyFailoverAlternationOnLoginSqlErrors + { + get => GetSwitchValue("s_useLegacyFailoverAlternationOnLoginSqlErrors"); + set => SetSwitchValue("s_useLegacyFailoverAlternationOnLoginSqlErrors", value); + } + /// /// Get or set the LegacyRowVersionNullBehavior switch value. /// diff --git a/src/Microsoft.Data.SqlClient/tests/UnitTests/Microsoft/Data/SqlClient/LocalAppContextSwitchesTest.cs b/src/Microsoft.Data.SqlClient/tests/UnitTests/Microsoft/Data/SqlClient/LocalAppContextSwitchesTest.cs index a5db02faa0..a468d8fd37 100644 --- a/src/Microsoft.Data.SqlClient/tests/UnitTests/Microsoft/Data/SqlClient/LocalAppContextSwitchesTest.cs +++ b/src/Microsoft.Data.SqlClient/tests/UnitTests/Microsoft/Data/SqlClient/LocalAppContextSwitchesTest.cs @@ -28,6 +28,7 @@ public void TestDefaultAppContextSwitchValues() Assert.False(LocalAppContextSwitches.UseConnectionPoolV2); Assert.False(LocalAppContextSwitches.TruncateScaledDecimal); Assert.False(LocalAppContextSwitches.IgnoreServerProvidedFailoverPartner); + Assert.False(LocalAppContextSwitches.UseLegacyFailoverAlternationOnLoginSqlErrors); Assert.False(LocalAppContextSwitches.EnableMultiSubnetFailoverByDefault); #if NET Assert.False(LocalAppContextSwitches.GlobalizationInvariantMode); diff --git a/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs b/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs index 1a742bb0de..1adc4dd6e4 100644 --- a/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs +++ b/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs @@ -881,5 +881,55 @@ public void NonFatalTransientLoginError_WithUserProvidedPartner_ShouldRetryPrima Assert.Equal(2, server.PreLoginCount - server.AbandonedPreLoginCount); Assert.Equal(0, failoverServer.PreLoginCount - failoverServer.AbandonedPreLoginCount); } + + /// + /// Verifies opt-in legacy behavior: login-phase SQL errors can alternate to the + /// failover partner when UseLegacyFailoverAlternationOnLoginSqlErrors is enabled. + /// + [Fact] + public void NonFatalTransientLoginError_WithLegacySwitch_ShouldAlternateToFailoverPartner() + { + using LocalAppContextSwitchesHelper switchesHelper = new(); + switchesHelper.UseLegacyFailoverAlternationOnLoginSqlErrors = true; + + using TdsServer failoverServer = new( + new TdsServerArguments + { + FailoverPartner = "localhost,1234", + }); + failoverServer.Start(); + + using TransientTdsErrorTdsServer server = new( + new TransientTdsErrorTdsServerArguments() + { + IsEnabledTransientError = true, + Number = 40613, + // Keep the login token non-fatal so parser state, not break/doom behavior, + // drives this branch decision. + ErrorClass = 16, + RepeatCount = 1, + FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}", + }); + server.Start(); + + SqlConnectionStringBuilder builder = new() + { + DataSource = $"localhost,{server.EndPoint.Port}", + InitialCatalog = "master", + ConnectTimeout = 30, + ConnectRetryInterval = 1, + Encrypt = false, + Pooling = false, + FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}", + }; + + using SqlConnection connection = new(builder.ConnectionString); + connection.Open(); + + Assert.Equal(ConnectionState.Open, connection.State); + Assert.Equal($"localhost,{failoverServer.EndPoint.Port}", connection.DataSource); + Assert.Equal(1, server.PreLoginCount - server.AbandonedPreLoginCount); + Assert.Equal(1, failoverServer.PreLoginCount - failoverServer.AbandonedPreLoginCount); + } } }