diff --git a/MainUnicode.cpp b/MainUnicode.cpp index 1d3c8486..a6529ff3 100644 --- a/MainUnicode.cpp +++ b/MainUnicode.cpp @@ -38,6 +38,21 @@ extern FILE *logFile; using namespace OdbcJdbcLibrary; +#ifndef _WINDOWS +// SQLWCHAR-aware length (in SQLWCHAR units), safe on Linux where +// sizeof(wchar_t) != sizeof(SQLWCHAR). Do NOT use wcslen() on SQLWCHAR +// data on Linux — it reads two SQLWCHARs per wchar_t and runs off the end. +static size_t sqlwcharLen( const SQLWCHAR *s ) +{ + size_t n = 0; + if ( !s ) + return 0; + while ( s[n] ) + ++n; + return n; +} +#endif + #ifdef _WINDOWS extern UINT codePage; // from Main.cpp #endif @@ -85,7 +100,7 @@ class ConvertingString if ( length == SQL_NTS ) lengthString = 0; else if ( retCountOfBytes ) - lengthString = length / sizeof(wchar_t); + lengthString = length / sizeof(SQLWCHAR); else lengthString = length; } @@ -135,13 +150,30 @@ class ConvertingString if ( len > 0 ) len--; #else - len = mbstowcs( (wchar_t*)unicodeString, (const char*)byteString, lengthString ); + // SQLWCHAR is 2 bytes on Linux (unixODBC defines it as unsigned short), + // but wchar_t is 4 bytes, so mbstowcs((wchar_t*)unicodeString, ...) + // both corrupts the output and risks overflowing the caller's buffer. + // Widen byte-by-byte into SQLWCHAR units, matching what unixODBC's + // ansi_to_unicode_copy() does internally. This is correct for the + // ASCII-only error/state strings that reach this code path; non-ASCII + // input will be handled by the broader ConvertingString rewrite tracked + // in issue #287 (Tier 9.1). + { + const SQLCHAR *src = byteString; + size_t i = 0; + while ( i < (size_t)lengthString && src[i] != 0 ) + { + unicodeString[i] = (SQLWCHAR)( src[i] & 0xFF ); + ++i; + } + len = i; + } #endif } if ( len > 0 ) { - *(LPWSTR)(unicodeString + len) = L'\0'; + unicodeString[len] = 0; if ( realLength ) { @@ -170,12 +202,18 @@ class ConvertingString wchar_t saveWC; if ( length == SQL_NTS ) +#ifdef _WINDOWS length = (int)wcslen( (const wchar_t*)wcString ); - else if ( wcString[length] != L'\0' ) +#else + length = (int)sqlwcharLen( wcString ); +#endif + else if ( wcString[length] != 0 ) { ptEndWC = (wchar_t*)&wcString[length]; saveWC = *ptEndWC; - *ptEndWC = L'\0'; + // Write a SQLWCHAR-sized NUL so we don't overrun the input by 2 bytes + // on Linux (wchar_t is 4 bytes there). + wcString[length] = 0; } if ( connection ) @@ -185,7 +223,10 @@ class ConvertingString #ifdef _WINDOWS bytesNeeded = WideCharToMultiByte( codePage, (DWORD)0, wcString, length, NULL, (int)0, NULL, NULL ); #else - bytesNeeded = wcstombs( NULL, (const wchar_t*)wcString, length ); + // See the symmetric comment in the destructor above: wcstombs assumes + // wchar_t-sized input, which corrupts SQLWCHAR data on Linux. The + // byte-narrowing loop below produces exactly `length` output bytes. + bytesNeeded = (size_t)length; #endif } @@ -198,7 +239,15 @@ class ConvertingString #ifdef _WINDOWS bytesNeeded = WideCharToMultiByte( codePage, 0, wcString, length, (LPSTR)byteString, (int)bytesNeeded, NULL, NULL ); #else - bytesNeeded = wcstombs( (char *)byteString, (const wchar_t*)wcString, bytesNeeded ); + { + size_t i = 0; + while ( i < (size_t)length && wcString[i] != 0 ) + { + byteString[i] = (SQLCHAR)( wcString[i] & 0xFF ); + ++i; + } + bytesNeeded = i; + } #endif } @@ -220,7 +269,7 @@ class ConvertingString if ( lengthString ) { byteString = new SQLCHAR[ lengthString + 2 ]; - memset(byteString, 0, lengthString + 2); + memset( byteString, 0, lengthString + 2 ); } else byteString = NULL; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 78ebd3e7..79fb28c6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -86,6 +86,7 @@ add_executable(firebird_odbc_tests test_catalogfunctions.cpp test_server_version.cpp test_scrollable_cursor.cpp + test_wide_errors.cpp # Category C — all tests SKIP'd (features not yet on upstream master) test_null_handles.cpp diff --git a/tests/test_helpers.h b/tests/test_helpers.h index 5776699c..4844898a 100644 --- a/tests/test_helpers.h +++ b/tests/test_helpers.h @@ -60,6 +60,38 @@ inline std::string GetSqlState(SQLSMALLINT handleType, SQLHANDLE handle) { return ""; } +// Convert an ASCII C-string to a null-terminated SQLWCHAR vector. Cannot use +// L"..." literals — sizeof(wchar_t) != sizeof(SQLWCHAR) on Linux. +inline std::vector ToSqlWchar(const char* s) { + std::vector out; + while (*s) { + out.push_back((SQLWCHAR)(unsigned char)*s++); + } + out.push_back(0); + return out; +} + +// Convert a null-terminated SQLWCHAR string to a narrow std::string (low byte +// only, so ASCII round-trips correctly; non-ASCII is lossy but these helpers +// are for tests, not production data). +inline std::string FromSqlWchar(const SQLWCHAR* s) { + std::string out; + if (!s) return out; + while (*s) { + out.push_back((char)(*s & 0xFF)); + ++s; + } + return out; +} + +// Length of a null-terminated SQLWCHAR string, in SQLWCHAR units. +inline size_t SqlWcharLen(const SQLWCHAR* s) { + size_t n = 0; + if (!s) return 0; + while (s[n]) ++n; + return n; +} + // Base test fixture: ODBC environment + connection + auto-cleanup class OdbcConnectedTest : public ::testing::Test { public: diff --git a/tests/test_wide_errors.cpp b/tests/test_wide_errors.cpp new file mode 100644 index 00000000..9378dc37 --- /dev/null +++ b/tests/test_wide_errors.cpp @@ -0,0 +1,250 @@ +// tests/test_wide_errors.cpp — Widechar ODBC call tests +// +// Drives the fix for the Linux widechar conversion bugs in MainUnicode.cpp. +// Covers both ConvertingString constructor scenarios: +// +// (a) Output path used by SQLGetDiagRecW / SQLErrorW +// ConvertingString(length, sqlState) -> exercised by +// GetDiagRecW_* and ErrorW_* tests below. +// +// (b) Input path used by SQLExecDirectW / SQLPrepareW +// ConvertingString(connection, wcString, length) + +// convUnicodeToString -> exercised by ExecDirectW_* / +// PrepareW_* tests below. +// +// On Linux the current driver mis-handles both paths — the widechar +// read-back is truncated to one SQLWCHAR, and a small output buffer can +// smash the caller's stack. These tests are therefore SKIP'd on Linux +// with a pointer to the follow-up Unicode-fix PR. Remove the skips once +// that PR lands. + +#include "test_helpers.h" +#include +#include +#include + +// The Linux widechar conversion paths in MainUnicode.cpp are now fixed +// (see this PR's changes to MainUnicode.cpp). The Linux-skip guard the +// tests were introduced with is therefore a no-op — the tests run on +// every platform. +#define SKIP_ON_LINUX_WIDECHAR() do {} while (0) + +class WideErrorsTest : public OdbcConnectedTest {}; + +// ============================================================================ +// (a) Output path — SQLGetDiagRecW on an error +// ============================================================================ + +// Force a parse error, then read the SQLSTATE back via the widechar variant +// with a tight 12-byte (6-SQLWCHAR) output buffer. This is the exact shape of +// the ConvertingString(12, sqlState) construction inside SQLGetDiagRecW. +TEST_F(WideErrorsTest, GetDiagRecW_SqlState) { + SKIP_ON_LINUX_WIDECHAR(); + + // Trigger an error (column doesn't exist) + SQLRETURN ret = SQLExecDirect(hStmt, + (SQLCHAR*)"SELECT doesnotexist FROM RDB$DATABASE", SQL_NTS); + ASSERT_FALSE(SQL_SUCCEEDED(ret)); + + SQLWCHAR sqlState[6] = {}; // 12 bytes, fits "HY000\0" + SQLINTEGER nativeError = 0; + SQLWCHAR messageBuf[SQL_MAX_MESSAGE_LENGTH] = {}; + SQLSMALLINT messageLen = 0; + + ret = SQLGetDiagRecW(SQL_HANDLE_STMT, hStmt, 1, + sqlState, &nativeError, + messageBuf, SQL_MAX_MESSAGE_LENGTH, &messageLen); + ASSERT_TRUE(SQL_SUCCEEDED(ret)) + << "SQLGetDiagRecW failed: " + << GetOdbcError(SQL_HANDLE_STMT, hStmt); + + // SQLSTATE is exactly 5 characters. + EXPECT_EQ(SqlWcharLen(sqlState), 5u) + << "State decoded as: '" << FromSqlWchar(sqlState) << "'"; + + // Firebird returns 42S22 (column not found) or 42000 or HY000. + std::string state = FromSqlWchar(sqlState); + EXPECT_TRUE(state == "42S22" || state == "42000" || state == "HY000") + << "Unexpected SQLSTATE: '" << state << "'"; +} + +// Same as above but checks the message buffer, which uses the other +// ConvertingString size (bufferLength can be large). This exercises the +// widechar path with a reasonable output buffer. +TEST_F(WideErrorsTest, GetDiagRecW_Message) { + SKIP_ON_LINUX_WIDECHAR(); + + SQLRETURN ret = SQLExecDirect(hStmt, + (SQLCHAR*)"SELECT doesnotexist FROM RDB$DATABASE", SQL_NTS); + ASSERT_FALSE(SQL_SUCCEEDED(ret)); + + SQLWCHAR sqlState[6] = {}; + SQLINTEGER nativeError = 0; + SQLWCHAR messageBuf[512] = {}; + SQLSMALLINT messageLen = 0; + + ret = SQLGetDiagRecW(SQL_HANDLE_STMT, hStmt, 1, + sqlState, &nativeError, + messageBuf, 512, &messageLen); + ASSERT_TRUE(SQL_SUCCEEDED(ret)); + + // Message must be non-empty and readable as ASCII. + std::string msg = FromSqlWchar(messageBuf); + EXPECT_FALSE(msg.empty()) << "Error message was empty"; + + // And the driver must report the length correctly (in SQLWCHAR units + // according to the spec). + EXPECT_GT(messageLen, 0); +} + +// Deliberately tight output buffer — the driver must not overrun it. +// With sqlState sized at 6 SQLWCHARs (12 bytes) this is exactly the shape +// that originally triggered the heap-buffer-overflow / stack-smash pair. +TEST_F(WideErrorsTest, GetDiagRecW_SmallBuffer) { + SKIP_ON_LINUX_WIDECHAR(); + + SQLRETURN ret = SQLExecDirect(hStmt, + (SQLCHAR*)"SELECT doesnotexist FROM RDB$DATABASE", SQL_NTS); + ASSERT_FALSE(SQL_SUCCEEDED(ret)); + + // Guard bytes either side; any write outside sqlState[] will trip them. + SQLWCHAR guardBefore[4]; + SQLWCHAR sqlState[6] = {}; + SQLWCHAR guardAfter[4]; + for (int i = 0; i < 4; ++i) { + guardBefore[i] = (SQLWCHAR)0xBEEF; + guardAfter[i] = (SQLWCHAR)0xBEEF; + } + + SQLINTEGER nativeError = 0; + SQLWCHAR messageBuf[16] = {}; // intentionally tiny + SQLSMALLINT messageLen = 0; + + ret = SQLGetDiagRecW(SQL_HANDLE_STMT, hStmt, 1, + sqlState, &nativeError, + messageBuf, 16, &messageLen); + ASSERT_TRUE(SQL_SUCCEEDED(ret) || ret == SQL_SUCCESS_WITH_INFO); + + // Guards must be untouched. + for (int i = 0; i < 4; ++i) { + EXPECT_EQ(guardBefore[i], (SQLWCHAR)0xBEEF) << "guardBefore[" << i << "]"; + EXPECT_EQ(guardAfter[i], (SQLWCHAR)0xBEEF) << "guardAfter[" << i << "]"; + } + + // State still decodes to 5 characters. + EXPECT_EQ(SqlWcharLen(sqlState), 5u) + << "State decoded as: '" << FromSqlWchar(sqlState) << "'"; +} + +// Legacy ODBC 2.x SQLErrorW — same underlying ConvertingString(12, sqlState) +// shape. Kept so the fix also covers that call site. +TEST_F(WideErrorsTest, ErrorW_SqlState) { + SKIP_ON_LINUX_WIDECHAR(); + + SQLRETURN ret = SQLExecDirect(hStmt, + (SQLCHAR*)"SELECT doesnotexist FROM RDB$DATABASE", SQL_NTS); + ASSERT_FALSE(SQL_SUCCEEDED(ret)); + + SQLWCHAR sqlState[6] = {}; + SQLINTEGER nativeError = 0; + SQLWCHAR messageBuf[512] = {}; + SQLSMALLINT messageLen = 0; + + ret = SQLErrorW(hEnv, hDbc, hStmt, + sqlState, &nativeError, + messageBuf, 512, &messageLen); + ASSERT_TRUE(SQL_SUCCEEDED(ret)); + + EXPECT_EQ(SqlWcharLen(sqlState), 5u) + << "State decoded as: '" << FromSqlWchar(sqlState) << "'"; +} + +// ============================================================================ +// (b) Input path — SQLExecDirectW / SQLPrepareW +// ============================================================================ + +class WideExecTest : public OdbcConnectedTest {}; + +// Pass an ASCII query as SQLWCHAR with SQL_NTS — exercises the convUnicodeToString +// path's sqlwcharLen-equivalent branch (currently wcslen on a SQLWCHAR pointer, +// which is wrong on Linux). +TEST_F(WideExecTest, ExecDirectW_Ascii_Nts) { + SKIP_ON_LINUX_WIDECHAR(); + + auto query = ToSqlWchar("SELECT 1 FROM RDB$DATABASE"); + SQLRETURN ret = SQLExecDirectW(hStmt, query.data(), SQL_NTS); + ASSERT_TRUE(SQL_SUCCEEDED(ret)) + << "SQLExecDirectW failed: " + << GetOdbcError(SQL_HANDLE_STMT, hStmt); + + SQLINTEGER val = 0; + SQLLEN ind = 0; + SQLBindCol(hStmt, 1, SQL_C_SLONG, &val, 0, &ind); + ret = SQLFetch(hStmt); + ASSERT_TRUE(SQL_SUCCEEDED(ret)); + EXPECT_EQ(val, 1); +} + +// Same but with an explicit SQLWCHAR-unit length — exercises the other branch +// of convUnicodeToString where we temporarily NUL-terminate input. +TEST_F(WideExecTest, ExecDirectW_Ascii_ExplicitLength) { + SKIP_ON_LINUX_WIDECHAR(); + + auto query = ToSqlWchar("SELECT 1 FROM RDB$DATABASE"); + // Length is in characters (SQLWCHAR units), per the spec. + SQLINTEGER len = (SQLINTEGER)(query.size() - 1); // exclude trailing NUL + + SQLRETURN ret = SQLExecDirectW(hStmt, query.data(), len); + ASSERT_TRUE(SQL_SUCCEEDED(ret)) + << "SQLExecDirectW(len=" << len << ") failed: " + << GetOdbcError(SQL_HANDLE_STMT, hStmt); + + SQLINTEGER val = 0; + SQLLEN ind = 0; + SQLBindCol(hStmt, 1, SQL_C_SLONG, &val, 0, &ind); + ret = SQLFetch(hStmt); + ASSERT_TRUE(SQL_SUCCEEDED(ret)); + EXPECT_EQ(val, 1); +} + +// SQLPrepareW twin — same input path as ExecDirectW. +TEST_F(WideExecTest, PrepareW_Ascii_Nts) { + SKIP_ON_LINUX_WIDECHAR(); + + auto query = ToSqlWchar("SELECT 1 FROM RDB$DATABASE"); + SQLRETURN ret = SQLPrepareW(hStmt, query.data(), SQL_NTS); + ASSERT_TRUE(SQL_SUCCEEDED(ret)) + << "SQLPrepareW failed: " + << GetOdbcError(SQL_HANDLE_STMT, hStmt); + + ret = SQLExecute(hStmt); + ASSERT_TRUE(SQL_SUCCEEDED(ret)); + + SQLINTEGER val = 0; + SQLLEN ind = 0; + SQLBindCol(hStmt, 1, SQL_C_SLONG, &val, 0, &ind); + ret = SQLFetch(hStmt); + ASSERT_TRUE(SQL_SUCCEEDED(ret)); + EXPECT_EQ(val, 1); +} + +// A query long enough that the narrow byteString allocation has to grow. +// If the length accounting is wrong in SQLWCHAR vs wchar_t units, this is +// where truncation/overflow bugs surface. +TEST_F(WideExecTest, ExecDirectW_Ascii_LongQuery) { + SKIP_ON_LINUX_WIDECHAR(); + + // 200+ byte query so the byteString buffer is a meaningful size. + std::string q = "SELECT "; + for (int i = 0; i < 40; ++i) q += "1+"; + q += "0 FROM RDB$DATABASE"; + + auto query = ToSqlWchar(q.c_str()); + SQLRETURN ret = SQLExecDirectW(hStmt, query.data(), SQL_NTS); + ASSERT_TRUE(SQL_SUCCEEDED(ret)) + << "SQLExecDirectW(long) failed: " + << GetOdbcError(SQL_HANDLE_STMT, hStmt); + + ret = SQLFetch(hStmt); + ASSERT_TRUE(SQL_SUCCEEDED(ret)); +}