From a776d6eec8820a2dae0cfe546f1fe66f90dfd2d4 Mon Sep 17 00:00:00 2001 From: "F.D.Castel" Date: Mon, 20 Apr 2026 12:53:20 -0300 Subject: [PATCH 1/3] Add widechar ODBC call tests (SQLGetDiagRecW / SQLErrorW / SQLExecDirectW / SQLPrepareW) Exercises both ConvertingString constructor paths in MainUnicode.cpp: output buffer (SQLGetDiagRecW / SQLErrorW) and input buffer (SQLExecDirectW / SQLPrepareW). Linux tests skip with a pointer to the follow-up Unicode rewrite PR; they are meant to drive that PR's implementation. Windows runs them for real. Per the plan in #289 comment 4279111183. Tracked as issue #287 Tier 1b. --- tests/CMakeLists.txt | 1 + tests/test_helpers.h | 32 +++++ tests/test_wide_errors.cpp | 258 +++++++++++++++++++++++++++++++++++++ 3 files changed, 291 insertions(+) create mode 100644 tests/test_wide_errors.cpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 78ebd3e7..79fb28c6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -86,6 +86,7 @@ add_executable(firebird_odbc_tests test_catalogfunctions.cpp test_server_version.cpp test_scrollable_cursor.cpp + test_wide_errors.cpp # Category C — all tests SKIP'd (features not yet on upstream master) test_null_handles.cpp diff --git a/tests/test_helpers.h b/tests/test_helpers.h index 5776699c..4844898a 100644 --- a/tests/test_helpers.h +++ b/tests/test_helpers.h @@ -60,6 +60,38 @@ inline std::string GetSqlState(SQLSMALLINT handleType, SQLHANDLE handle) { return ""; } +// Convert an ASCII C-string to a null-terminated SQLWCHAR vector. Cannot use +// L"..." literals — sizeof(wchar_t) != sizeof(SQLWCHAR) on Linux. +inline std::vector ToSqlWchar(const char* s) { + std::vector out; + while (*s) { + out.push_back((SQLWCHAR)(unsigned char)*s++); + } + out.push_back(0); + return out; +} + +// Convert a null-terminated SQLWCHAR string to a narrow std::string (low byte +// only, so ASCII round-trips correctly; non-ASCII is lossy but these helpers +// are for tests, not production data). +inline std::string FromSqlWchar(const SQLWCHAR* s) { + std::string out; + if (!s) return out; + while (*s) { + out.push_back((char)(*s & 0xFF)); + ++s; + } + return out; +} + +// Length of a null-terminated SQLWCHAR string, in SQLWCHAR units. +inline size_t SqlWcharLen(const SQLWCHAR* s) { + size_t n = 0; + if (!s) return 0; + while (s[n]) ++n; + return n; +} + // Base test fixture: ODBC environment + connection + auto-cleanup class OdbcConnectedTest : public ::testing::Test { public: diff --git a/tests/test_wide_errors.cpp b/tests/test_wide_errors.cpp new file mode 100644 index 00000000..3c390076 --- /dev/null +++ b/tests/test_wide_errors.cpp @@ -0,0 +1,258 @@ +// tests/test_wide_errors.cpp — Widechar ODBC call tests +// +// Drives the fix for the Linux widechar conversion bugs in MainUnicode.cpp. +// Covers both ConvertingString constructor scenarios: +// +// (a) Output path used by SQLGetDiagRecW / SQLErrorW +// ConvertingString(length, sqlState) -> exercised by +// GetDiagRecW_* and ErrorW_* tests below. +// +// (b) Input path used by SQLExecDirectW / SQLPrepareW +// ConvertingString(connection, wcString, length) + +// convUnicodeToString -> exercised by ExecDirectW_* / +// PrepareW_* tests below. +// +// On Linux the current driver mis-handles both paths — the widechar +// read-back is truncated to one SQLWCHAR, and a small output buffer can +// smash the caller's stack. These tests are therefore SKIP'd on Linux +// with a pointer to the follow-up Unicode-fix PR. Remove the skips once +// that PR lands. + +#include "test_helpers.h" +#include +#include +#include + +#ifndef _WIN32 +// Shared skip body — all tests in this file depend on the same Linux +// widechar bug. Keep the message pointing at issue #287 Tier 1b so +// grepping finds every affected test at once. +#define SKIP_ON_LINUX_WIDECHAR() \ + do { \ + GTEST_SKIP() << "Widechar conversion paths broken on Linux — " \ + "see issue #287 Tier 1b. Un-skip once the " \ + "Unicode-fix PR lands."; \ + } while (0) +#else +#define SKIP_ON_LINUX_WIDECHAR() do {} while (0) +#endif + +class WideErrorsTest : public OdbcConnectedTest {}; + +// ============================================================================ +// (a) Output path — SQLGetDiagRecW on an error +// ============================================================================ + +// Force a parse error, then read the SQLSTATE back via the widechar variant +// with a tight 12-byte (6-SQLWCHAR) output buffer. This is the exact shape of +// the ConvertingString(12, sqlState) construction inside SQLGetDiagRecW. +TEST_F(WideErrorsTest, GetDiagRecW_SqlState) { + SKIP_ON_LINUX_WIDECHAR(); + + // Trigger an error (column doesn't exist) + SQLRETURN ret = SQLExecDirect(hStmt, + (SQLCHAR*)"SELECT doesnotexist FROM RDB$DATABASE", SQL_NTS); + ASSERT_FALSE(SQL_SUCCEEDED(ret)); + + SQLWCHAR sqlState[6] = {}; // 12 bytes, fits "HY000\0" + SQLINTEGER nativeError = 0; + SQLWCHAR messageBuf[SQL_MAX_MESSAGE_LENGTH] = {}; + SQLSMALLINT messageLen = 0; + + ret = SQLGetDiagRecW(SQL_HANDLE_STMT, hStmt, 1, + sqlState, &nativeError, + messageBuf, SQL_MAX_MESSAGE_LENGTH, &messageLen); + ASSERT_TRUE(SQL_SUCCEEDED(ret)) + << "SQLGetDiagRecW failed: " + << GetOdbcError(SQL_HANDLE_STMT, hStmt); + + // SQLSTATE is exactly 5 characters. + EXPECT_EQ(SqlWcharLen(sqlState), 5u) + << "State decoded as: '" << FromSqlWchar(sqlState) << "'"; + + // Firebird returns 42S22 (column not found) or 42000 or HY000. + std::string state = FromSqlWchar(sqlState); + EXPECT_TRUE(state == "42S22" || state == "42000" || state == "HY000") + << "Unexpected SQLSTATE: '" << state << "'"; +} + +// Same as above but checks the message buffer, which uses the other +// ConvertingString size (bufferLength can be large). This exercises the +// widechar path with a reasonable output buffer. +TEST_F(WideErrorsTest, GetDiagRecW_Message) { + SKIP_ON_LINUX_WIDECHAR(); + + SQLRETURN ret = SQLExecDirect(hStmt, + (SQLCHAR*)"SELECT doesnotexist FROM RDB$DATABASE", SQL_NTS); + ASSERT_FALSE(SQL_SUCCEEDED(ret)); + + SQLWCHAR sqlState[6] = {}; + SQLINTEGER nativeError = 0; + SQLWCHAR messageBuf[512] = {}; + SQLSMALLINT messageLen = 0; + + ret = SQLGetDiagRecW(SQL_HANDLE_STMT, hStmt, 1, + sqlState, &nativeError, + messageBuf, 512, &messageLen); + ASSERT_TRUE(SQL_SUCCEEDED(ret)); + + // Message must be non-empty and readable as ASCII. + std::string msg = FromSqlWchar(messageBuf); + EXPECT_FALSE(msg.empty()) << "Error message was empty"; + + // And the driver must report the length correctly (in SQLWCHAR units + // according to the spec). + EXPECT_GT(messageLen, 0); +} + +// Deliberately tight output buffer — the driver must not overrun it. +// With sqlState sized at 6 SQLWCHARs (12 bytes) this is exactly the shape +// that originally triggered the heap-buffer-overflow / stack-smash pair. +TEST_F(WideErrorsTest, GetDiagRecW_SmallBuffer) { + SKIP_ON_LINUX_WIDECHAR(); + + SQLRETURN ret = SQLExecDirect(hStmt, + (SQLCHAR*)"SELECT doesnotexist FROM RDB$DATABASE", SQL_NTS); + ASSERT_FALSE(SQL_SUCCEEDED(ret)); + + // Guard bytes either side; any write outside sqlState[] will trip them. + SQLWCHAR guardBefore[4]; + SQLWCHAR sqlState[6] = {}; + SQLWCHAR guardAfter[4]; + for (int i = 0; i < 4; ++i) { + guardBefore[i] = (SQLWCHAR)0xBEEF; + guardAfter[i] = (SQLWCHAR)0xBEEF; + } + + SQLINTEGER nativeError = 0; + SQLWCHAR messageBuf[16] = {}; // intentionally tiny + SQLSMALLINT messageLen = 0; + + ret = SQLGetDiagRecW(SQL_HANDLE_STMT, hStmt, 1, + sqlState, &nativeError, + messageBuf, 16, &messageLen); + ASSERT_TRUE(SQL_SUCCEEDED(ret) || ret == SQL_SUCCESS_WITH_INFO); + + // Guards must be untouched. + for (int i = 0; i < 4; ++i) { + EXPECT_EQ(guardBefore[i], (SQLWCHAR)0xBEEF) << "guardBefore[" << i << "]"; + EXPECT_EQ(guardAfter[i], (SQLWCHAR)0xBEEF) << "guardAfter[" << i << "]"; + } + + // State still decodes to 5 characters. + EXPECT_EQ(SqlWcharLen(sqlState), 5u) + << "State decoded as: '" << FromSqlWchar(sqlState) << "'"; +} + +// Legacy ODBC 2.x SQLErrorW — same underlying ConvertingString(12, sqlState) +// shape. Kept so the fix also covers that call site. +TEST_F(WideErrorsTest, ErrorW_SqlState) { + SKIP_ON_LINUX_WIDECHAR(); + + SQLRETURN ret = SQLExecDirect(hStmt, + (SQLCHAR*)"SELECT doesnotexist FROM RDB$DATABASE", SQL_NTS); + ASSERT_FALSE(SQL_SUCCEEDED(ret)); + + SQLWCHAR sqlState[6] = {}; + SQLINTEGER nativeError = 0; + SQLWCHAR messageBuf[512] = {}; + SQLSMALLINT messageLen = 0; + + ret = SQLErrorW(hEnv, hDbc, hStmt, + sqlState, &nativeError, + messageBuf, 512, &messageLen); + ASSERT_TRUE(SQL_SUCCEEDED(ret)); + + EXPECT_EQ(SqlWcharLen(sqlState), 5u) + << "State decoded as: '" << FromSqlWchar(sqlState) << "'"; +} + +// ============================================================================ +// (b) Input path — SQLExecDirectW / SQLPrepareW +// ============================================================================ + +class WideExecTest : public OdbcConnectedTest {}; + +// Pass an ASCII query as SQLWCHAR with SQL_NTS — exercises the convUnicodeToString +// path's sqlwcharLen-equivalent branch (currently wcslen on a SQLWCHAR pointer, +// which is wrong on Linux). +TEST_F(WideExecTest, ExecDirectW_Ascii_Nts) { + SKIP_ON_LINUX_WIDECHAR(); + + auto query = ToSqlWchar("SELECT 1 FROM RDB$DATABASE"); + SQLRETURN ret = SQLExecDirectW(hStmt, query.data(), SQL_NTS); + ASSERT_TRUE(SQL_SUCCEEDED(ret)) + << "SQLExecDirectW failed: " + << GetOdbcError(SQL_HANDLE_STMT, hStmt); + + SQLINTEGER val = 0; + SQLLEN ind = 0; + SQLBindCol(hStmt, 1, SQL_C_SLONG, &val, 0, &ind); + ret = SQLFetch(hStmt); + ASSERT_TRUE(SQL_SUCCEEDED(ret)); + EXPECT_EQ(val, 1); +} + +// Same but with an explicit SQLWCHAR-unit length — exercises the other branch +// of convUnicodeToString where we temporarily NUL-terminate input. +TEST_F(WideExecTest, ExecDirectW_Ascii_ExplicitLength) { + SKIP_ON_LINUX_WIDECHAR(); + + auto query = ToSqlWchar("SELECT 1 FROM RDB$DATABASE"); + // Length is in characters (SQLWCHAR units), per the spec. + SQLINTEGER len = (SQLINTEGER)(query.size() - 1); // exclude trailing NUL + + SQLRETURN ret = SQLExecDirectW(hStmt, query.data(), len); + ASSERT_TRUE(SQL_SUCCEEDED(ret)) + << "SQLExecDirectW(len=" << len << ") failed: " + << GetOdbcError(SQL_HANDLE_STMT, hStmt); + + SQLINTEGER val = 0; + SQLLEN ind = 0; + SQLBindCol(hStmt, 1, SQL_C_SLONG, &val, 0, &ind); + ret = SQLFetch(hStmt); + ASSERT_TRUE(SQL_SUCCEEDED(ret)); + EXPECT_EQ(val, 1); +} + +// SQLPrepareW twin — same input path as ExecDirectW. +TEST_F(WideExecTest, PrepareW_Ascii_Nts) { + SKIP_ON_LINUX_WIDECHAR(); + + auto query = ToSqlWchar("SELECT 1 FROM RDB$DATABASE"); + SQLRETURN ret = SQLPrepareW(hStmt, query.data(), SQL_NTS); + ASSERT_TRUE(SQL_SUCCEEDED(ret)) + << "SQLPrepareW failed: " + << GetOdbcError(SQL_HANDLE_STMT, hStmt); + + ret = SQLExecute(hStmt); + ASSERT_TRUE(SQL_SUCCEEDED(ret)); + + SQLINTEGER val = 0; + SQLLEN ind = 0; + SQLBindCol(hStmt, 1, SQL_C_SLONG, &val, 0, &ind); + ret = SQLFetch(hStmt); + ASSERT_TRUE(SQL_SUCCEEDED(ret)); + EXPECT_EQ(val, 1); +} + +// A query long enough that the narrow byteString allocation has to grow. +// If the length accounting is wrong in SQLWCHAR vs wchar_t units, this is +// where truncation/overflow bugs surface. +TEST_F(WideExecTest, ExecDirectW_Ascii_LongQuery) { + SKIP_ON_LINUX_WIDECHAR(); + + // 200+ byte query so the byteString buffer is a meaningful size. + std::string q = "SELECT "; + for (int i = 0; i < 40; ++i) q += "1+"; + q += "0 FROM RDB$DATABASE"; + + auto query = ToSqlWchar(q.c_str()); + SQLRETURN ret = SQLExecDirectW(hStmt, query.data(), SQL_NTS); + ASSERT_TRUE(SQL_SUCCEEDED(ret)) + << "SQLExecDirectW(long) failed: " + << GetOdbcError(SQL_HANDLE_STMT, hStmt); + + ret = SQLFetch(hStmt); + ASSERT_TRUE(SQL_SUCCEEDED(ret)); +} From 69da8fcd75939bc7c103b0ad882e94c59d551c56 Mon Sep 17 00:00:00 2001 From: "F.D.Castel" Date: Mon, 20 Apr 2026 12:56:40 -0300 Subject: [PATCH 2/3] Fix SQLWCHAR conversion paths in MainUnicode.cpp on Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace wchar_t-assuming mbstowcs/wcstombs/wcslen calls in the non- connection Linux branches with SQLWCHAR-aware byte-level widening/ narrowing loops (matches unixODBC's ansi_to_unicode_copy / unicode_to_ansi_copy). Makes the widechar-tests PR green on Linux. Remove the Linux-only SKIP guard added by the tests PR — the tests now run on every platform. Issue #287 Tier 1b is not fully closed by this: the connection MbsToWcs/WcsToMbs branch and non-ASCII handling remain open. --- MainUnicode.cpp | 68 +++++++++++++++++++++++++++++++++----- tests/test_wide_errors.cpp | 16 +++------ 2 files changed, 64 insertions(+), 20 deletions(-) diff --git a/MainUnicode.cpp b/MainUnicode.cpp index 1d3c8486..5222a7c2 100644 --- a/MainUnicode.cpp +++ b/MainUnicode.cpp @@ -38,6 +38,21 @@ extern FILE *logFile; using namespace OdbcJdbcLibrary; +#ifndef _WINDOWS +// SQLWCHAR-aware length (in SQLWCHAR units), safe on Linux where +// sizeof(wchar_t) != sizeof(SQLWCHAR). Do NOT use wcslen() on SQLWCHAR +// data on Linux — it reads two SQLWCHARs per wchar_t and runs off the end. +static size_t sqlwcharLen( const SQLWCHAR *s ) +{ + size_t n = 0; + if ( !s ) + return 0; + while ( s[n] ) + ++n; + return n; +} +#endif + #ifdef _WINDOWS extern UINT codePage; // from Main.cpp #endif @@ -85,7 +100,7 @@ class ConvertingString if ( length == SQL_NTS ) lengthString = 0; else if ( retCountOfBytes ) - lengthString = length / sizeof(wchar_t); + lengthString = length / sizeof(SQLWCHAR); else lengthString = length; } @@ -135,13 +150,33 @@ class ConvertingString if ( len > 0 ) len--; #else - len = mbstowcs( (wchar_t*)unicodeString, (const char*)byteString, lengthString ); + // SQLWCHAR is 2 bytes on Linux (unixODBC defines it as unsigned short), + // but wchar_t is 4 bytes, so mbstowcs((wchar_t*)unicodeString, ...) + // both corrupts the output and risks overflowing the caller's buffer. + // Widen byte-by-byte into SQLWCHAR units, matching what unixODBC's + // ansi_to_unicode_copy() does internally. This is correct for the + // ASCII-only error/state strings that reach this code path; non-ASCII + // input will be handled by the broader ConvertingString rewrite tracked + // in issue #287 (Tier 9.1). + { + const SQLCHAR *src = byteString; + size_t i = 0; + while ( i < (size_t)lengthString && src[i] != 0 ) + { + unicodeString[i] = (SQLWCHAR)( src[i] & 0xFF ); + ++i; + } + len = i; + } #endif } if ( len > 0 ) { - *(LPWSTR)(unicodeString + len) = L'\0'; + // NUL-terminate in SQLWCHAR units. LPWSTR assignment of L'\0' writes + // sizeof(wchar_t) bytes, which overruns the output buffer by 2 bytes + // on Linux. + unicodeString[len] = 0; if ( realLength ) { @@ -170,12 +205,18 @@ class ConvertingString wchar_t saveWC; if ( length == SQL_NTS ) +#ifdef _WINDOWS length = (int)wcslen( (const wchar_t*)wcString ); - else if ( wcString[length] != L'\0' ) +#else + length = (int)sqlwcharLen( wcString ); +#endif + else if ( wcString[length] != 0 ) { ptEndWC = (wchar_t*)&wcString[length]; saveWC = *ptEndWC; - *ptEndWC = L'\0'; + // Write a SQLWCHAR-sized NUL so we don't overrun the input by 2 bytes + // on Linux (wchar_t is 4 bytes there). + wcString[length] = 0; } if ( connection ) @@ -185,7 +226,10 @@ class ConvertingString #ifdef _WINDOWS bytesNeeded = WideCharToMultiByte( codePage, (DWORD)0, wcString, length, NULL, (int)0, NULL, NULL ); #else - bytesNeeded = wcstombs( NULL, (const wchar_t*)wcString, length ); + // See the symmetric comment in the destructor above: wcstombs assumes + // wchar_t-sized input, which corrupts SQLWCHAR data on Linux. The + // byte-narrowing loop below produces exactly `length` output bytes. + bytesNeeded = (size_t)length; #endif } @@ -198,7 +242,15 @@ class ConvertingString #ifdef _WINDOWS bytesNeeded = WideCharToMultiByte( codePage, 0, wcString, length, (LPSTR)byteString, (int)bytesNeeded, NULL, NULL ); #else - bytesNeeded = wcstombs( (char *)byteString, (const wchar_t*)wcString, bytesNeeded ); + { + size_t i = 0; + while ( i < (size_t)length && wcString[i] != 0 ) + { + byteString[i] = (SQLCHAR)( wcString[i] & 0xFF ); + ++i; + } + bytesNeeded = i; + } #endif } @@ -220,7 +272,7 @@ class ConvertingString if ( lengthString ) { byteString = new SQLCHAR[ lengthString + 2 ]; - memset(byteString, 0, lengthString + 2); + memset( byteString, 0, lengthString + 2 ); } else byteString = NULL; diff --git a/tests/test_wide_errors.cpp b/tests/test_wide_errors.cpp index 3c390076..9378dc37 100644 --- a/tests/test_wide_errors.cpp +++ b/tests/test_wide_errors.cpp @@ -23,19 +23,11 @@ #include #include -#ifndef _WIN32 -// Shared skip body — all tests in this file depend on the same Linux -// widechar bug. Keep the message pointing at issue #287 Tier 1b so -// grepping finds every affected test at once. -#define SKIP_ON_LINUX_WIDECHAR() \ - do { \ - GTEST_SKIP() << "Widechar conversion paths broken on Linux — " \ - "see issue #287 Tier 1b. Un-skip once the " \ - "Unicode-fix PR lands."; \ - } while (0) -#else +// The Linux widechar conversion paths in MainUnicode.cpp are now fixed +// (see this PR's changes to MainUnicode.cpp). The Linux-skip guard the +// tests were introduced with is therefore a no-op — the tests run on +// every platform. #define SKIP_ON_LINUX_WIDECHAR() do {} while (0) -#endif class WideErrorsTest : public OdbcConnectedTest {}; From 0d9bce9008fd1905e1aafc12ca32829286ab6f9a Mon Sep 17 00:00:00 2001 From: "F.D.Castel" Date: Mon, 20 Apr 2026 14:52:30 -0300 Subject: [PATCH 3/3] Drop misleading overrun comment on unicodeString[len] NUL-terminate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LPWSTR is WCHAR* = unsigned short* = SQLWCHAR* on both Windows and unixODBC/Linux, so `*(LPWSTR)ptr = L'\0'` writes 2 bytes via narrowing store, not 4 — no overrun. Thanks to @irodushka for the correction (#289 discussion_r3109004999). Keeping the replacement `unicodeString[len] = 0;` which he endorsed as more compact and understandable. The sibling comment in convUnicodeToString stays — that one is a real 4-byte write through an explicit `(wchar_t*)` cast. --- MainUnicode.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/MainUnicode.cpp b/MainUnicode.cpp index 5222a7c2..a6529ff3 100644 --- a/MainUnicode.cpp +++ b/MainUnicode.cpp @@ -173,9 +173,6 @@ class ConvertingString if ( len > 0 ) { - // NUL-terminate in SQLWCHAR units. LPWSTR assignment of L'\0' writes - // sizeof(wchar_t) bytes, which overruns the output buffer by 2 bytes - // on Linux. unicodeString[len] = 0; if ( realLength )