From dd347ac269722f97975d0d557d7f4abbafca2fec Mon Sep 17 00:00:00 2001 From: RageLiu <2273015469@qq.com> Date: Tue, 31 Mar 2026 10:19:39 +0800 Subject: [PATCH 1/7] Fix deterministic memory leak and dangling pointer in SQLParser::tokenize --- src/SQLParser.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/SQLParser.cpp b/src/SQLParser.cpp index b3bf0dfe..5334b9fe 100644 --- a/src/SQLParser.cpp +++ b/src/SQLParser.cpp @@ -59,11 +59,16 @@ bool SQLParser::tokenize(const std::string& sql, std::vector* tokens) { int16_t token = hsql_lex(&yylval, &yylloc, scanner); while (token != 0) { tokens->push_back(token); - token = hsql_lex(&yylval, &yylloc, scanner); - + if (token == SQL_IDENTIFIER || token == SQL_STRING) { free(yylval.sval); + yylval.sval = nullptr; + } + + token = hsql_lex(&yylval, &yylloc, scanner); + + } hsql__delete_buffer(state, scanner); From 2b254f355652de187c312f984c4964eb05f53855 Mon Sep 17 00:00:00 2001 From: RageLiu <2273015469@qq.com> Date: Tue, 31 Mar 2026 16:20:19 +0800 Subject: [PATCH 2/7] Add regression test for tokenize memory leak --- test/sql_parser.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/sql_parser.cpp b/test/sql_parser.cpp index 31b9be1f..c94f74c8 100644 --- a/test/sql_parser.cpp +++ b/test/sql_parser.cpp @@ -42,3 +42,17 @@ TEST(SQLParserTokenizeStringifyTest) { ASSERT(query == cache[token_string]); ASSERT(&query != &cache[token_string]); } + +TEST(SQLParserTokenizeLeakRegressionTest) { + + const std::string query = "'string_1' 'string_2' 'string_3';"; + std::vector tokens; + + ASSERT(SQLParser::tokenize(query, &tokens)); + + ASSERT_EQ(tokens.size(), 4); + ASSERT_EQ(tokens[0], SQL_STRING); + ASSERT_EQ(tokens[1], SQL_STRING); + ASSERT_EQ(tokens[2], SQL_STRING); + ASSERT_EQ(tokens[3], ';'); +} \ No newline at end of file From 571dc227c7ee23fd13b13d6a184b302706dda40c Mon Sep 17 00:00:00 2001 From: RageLiu <2273015469@qq.com> Date: Tue, 31 Mar 2026 20:21:53 +0800 Subject: [PATCH 3/7] update CI with Clang sanitizer builds --- .github/workflows/ci.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d9122e3b..c505d6d1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,6 +46,19 @@ jobs: cxx: clang++ os: macos-latest + - name: clang-sanitizer-ubuntu + cc: clang-19 + cxx: clang++-19 + os: ubuntu-latest + container: ubuntu:24.04 + build_options: "CXXFLAGS='-fsanitize=address,undefined -g' LDFLAGS='-fsanitize=address,undefined'" + + - name: clang-sanitizer-macOS + cc: clang + cxx: clang++ + os: macos-latest + build_options: "CXXFLAGS='-fsanitize=address,undefined -g' LDFLAGS='-fsanitize=address,undefined'" + steps: - name: Checkout uses: actions/checkout@v4 From 14c56213ecd5eef2efde6e5f6ca6e137882efe81 Mon Sep 17 00:00:00 2001 From: RageLiu <2273015469@qq.com> Date: Tue, 31 Mar 2026 21:51:19 +0800 Subject: [PATCH 4/7] Fix CI OS detection --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c505d6d1..c52e9ba5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -80,14 +80,14 @@ jobs: git checkout $GITHUB_HEAD_REF - name: Setup (macOS) - if: matrix.name == 'clang-macOS' + if: matrix.os == 'macos-latest' run: | brew install bison flex echo "BISON=$(brew --prefix bison)/bin/bison" >> $GITHUB_ENV echo "FLEX=$(brew --prefix flex)/bin/flex" >> $GITHUB_ENV - name: Setup (Ubuntu) - if: matrix.name != 'clang-macOS' + if: matrix.os == 'ubuntu-latest' run: | apt-get update apt-get install --no-install-recommends -y bison flex ${CC} ${CXX} make valgrind From 23e894df727ab4d37a71a55814bf14748672c357 Mon Sep 17 00:00:00 2001 From: RageLiu <2273015469@qq.com> Date: Wed, 1 Apr 2026 09:22:14 +0800 Subject: [PATCH 5/7] Temporarily remove fix to verify sanitizer failure --- .github/workflows/ci.yml | 2 +- src/SQLParser.cpp | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c52e9ba5..37e52eee 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -61,7 +61,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 if: matrix.name != 'gcc-6' - name: Checkout (Ubuntu 18.04) diff --git a/src/SQLParser.cpp b/src/SQLParser.cpp index 5334b9fe..22459e85 100644 --- a/src/SQLParser.cpp +++ b/src/SQLParser.cpp @@ -59,16 +59,11 @@ bool SQLParser::tokenize(const std::string& sql, std::vector* tokens) { int16_t token = hsql_lex(&yylval, &yylloc, scanner); while (token != 0) { tokens->push_back(token); - + token = hsql_lex(&yylval, &yylloc, scanner); + if (token == SQL_IDENTIFIER || token == SQL_STRING) { free(yylval.sval); - yylval.sval = nullptr; - } - - token = hsql_lex(&yylval, &yylloc, scanner); - - } hsql__delete_buffer(state, scanner); @@ -76,4 +71,4 @@ bool SQLParser::tokenize(const std::string& sql, std::vector* tokens) { return true; } -} // namespace hsql +} // namespace hsql \ No newline at end of file From 20603d2ce09176004a1f40ebc3497cedb10f2d91 Mon Sep 17 00:00:00 2001 From: RageLiu <2273015469@qq.com> Date: Wed, 1 Apr 2026 14:21:32 +0800 Subject: [PATCH 6/7] Fix newline EOF and keep fix removed for verification --- src/SQLParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SQLParser.cpp b/src/SQLParser.cpp index 22459e85..b3bf0dfe 100644 --- a/src/SQLParser.cpp +++ b/src/SQLParser.cpp @@ -71,4 +71,4 @@ bool SQLParser::tokenize(const std::string& sql, std::vector* tokens) { return true; } -} // namespace hsql \ No newline at end of file +} // namespace hsql From 79f3c33ff55bd82473aecda058e11b4ab6704f67 Mon Sep 17 00:00:00 2001 From: RageLiu <2273015469@qq.com> Date: Wed, 1 Apr 2026 15:34:23 +0800 Subject: [PATCH 7/7] Re-apply the fix: Tests now pass and memory leaks are resolved --- src/SQLParser.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/SQLParser.cpp b/src/SQLParser.cpp index b3bf0dfe..c4806b69 100644 --- a/src/SQLParser.cpp +++ b/src/SQLParser.cpp @@ -59,11 +59,13 @@ bool SQLParser::tokenize(const std::string& sql, std::vector* tokens) { int16_t token = hsql_lex(&yylval, &yylloc, scanner); while (token != 0) { tokens->push_back(token); - token = hsql_lex(&yylval, &yylloc, scanner); if (token == SQL_IDENTIFIER || token == SQL_STRING) { free(yylval.sval); + yylval.sval = nullptr; } + token = hsql_lex(&yylval, &yylloc, scanner); + } hsql__delete_buffer(state, scanner);