Skip to content

Commit 9e5141a

Browse files
sqlparser: replace nested tokenizer with inline versioned comment scanning (#19725)
Signed-off-by: Arthur Schreiber <arthur@planetscale.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9a3646f commit 9e5141a

7 files changed

Lines changed: 325 additions & 250 deletions

File tree

go/vt/sqlparser/AGENTS.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# SQL Parser
2+
3+
## Tokenizer
4+
5+
The tokenizer (`token.go`) processes SQL input character-by-character. When
6+
modifying scan methods, watch out for **recursion and stack overflow**. Scan
7+
helper methods must not call back into `Scan()`. Crafted input with many
8+
consecutive constructs can cause unbounded stack growth. Instead, have helpers
9+
advance scanner state and return to `Scan`'s main `for` loop via `continue`.

go/vt/sqlparser/ast_funcs_test.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,3 +254,100 @@ func TestKeyspaceToNonQualifiedTable(t *testing.T) {
254254
AddKeyspace(stmt, "ks2")
255255
require.Equal(t, "select col, col + (select 1 from ks2.t4) from ks.t join ks2.t2 join (select 1 from ks2.t3) as x where t.id = t2.id and x.id = t.id", String(stmt))
256256
}
257+
258+
// TestVersionedCommentParsing tests MySQL versioned comment handling.
259+
// All expectations validated against MySQL 8.4.
260+
func TestVersionedCommentParsing(t *testing.T) {
261+
parser, err := New(Options{MySQLServerVersion: "8.1.20"})
262+
require.NoError(t, err)
263+
264+
tests := []struct {
265+
name string
266+
query string
267+
expected string // expected formatted output after parse
268+
isError bool
269+
errMsg string // if isError, the expected error message
270+
}{
271+
{
272+
name: "expanded (version matches)",
273+
query: "SELECT /*!80100 JSON_VALUE(col, '$.x') AS jval, */ id FROM t",
274+
expected: "select json_value(col, '$.x') as jval, id from t",
275+
},
276+
{
277+
name: "discarded (version too new)",
278+
query: "SELECT /*!90000 UPPER('hidden'), */ 42",
279+
expected: "select 42 from dual",
280+
},
281+
{
282+
name: "no version number (always expanded)",
283+
query: "SELECT /*! 1 + 1 */ FROM dual",
284+
expected: "select 1 + 1 from dual",
285+
},
286+
{
287+
name: "nested regular comment inside version comment",
288+
query: "SELECT /*!80100 1 /* a comment */ + 2 */",
289+
expected: "select 1 + 2 from dual",
290+
},
291+
{
292+
name: "nested version comment inside version comment treated as regular comment",
293+
query: "SELECT /*!80100 1 /*!99999 noise */ + 2 */",
294+
expected: "select 1 + 2 from dual",
295+
},
296+
{
297+
name: "unclosed version comment",
298+
query: "SELECT /*!80100 1 + 2",
299+
isError: true,
300+
errMsg: "syntax error at position 22",
301+
},
302+
{
303+
name: "version number without whitespace after digits",
304+
query: "SELECT 1 + /*!801002*/",
305+
expected: "select 1 + 2 from dual",
306+
},
307+
{
308+
name: "empty version comment",
309+
query: "SELECT 1 /*!80100 */",
310+
expected: "select 1 from dual",
311+
},
312+
{
313+
name: "fewer than 5 version digits are treated as content",
314+
query: "SELECT /*!8010*/ FROM dual",
315+
expected: "select 8010 from dual",
316+
},
317+
{
318+
name: "non-digits after /*! are treated as content",
319+
query: "SELECT /*! 1 + 2*/ FROM dual",
320+
expected: "select 1 + 2 from dual",
321+
},
322+
{
323+
name: "no space before closing */",
324+
query: "SELECT /*!80100 42*/ FROM dual",
325+
expected: "select 42 from dual",
326+
},
327+
{
328+
name: "nested comment inside skipped version comment",
329+
query: "SELECT /*!90000 1 /* nested */ + 2 */ 42",
330+
expected: "select 42 from dual",
331+
},
332+
{
333+
name: "nested version comment inside skipped version comment",
334+
query: "SELECT /*!90000 1 /*!99999 nested */ + 2 */ 42",
335+
expected: "select 42 from dual",
336+
},
337+
}
338+
339+
for _, tt := range tests {
340+
t.Run(tt.name, func(t *testing.T) {
341+
stmt, err := parser.Parse(tt.query)
342+
if tt.isError {
343+
require.Error(t, err, "expected parse error for: %s", tt.query)
344+
if tt.errMsg != "" {
345+
assert.EqualError(t, err, tt.errMsg)
346+
}
347+
return
348+
}
349+
require.NoError(t, err, "unexpected error for: %s", tt.query)
350+
assert.Equal(t, tt.expected, String(stmt))
351+
})
352+
}
353+
}

go/vt/sqlparser/comments.go

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -199,28 +199,6 @@ func hasCommentPrefix(sql string) bool {
199199
return len(sql) > 1 && ((sql[0] == '/' && sql[1] == '*') || (sql[0] == '-' && sql[1] == '-'))
200200
}
201201

202-
// ExtractMysqlComment extracts the version and SQL from a comment-only query
203-
// such as /*!50708 sql here */
204-
func ExtractMysqlComment(sql string) (string, string) {
205-
sql = sql[3 : len(sql)-2]
206-
207-
digitCount := 0
208-
endOfVersionIndex := strings.IndexFunc(sql, func(c rune) bool {
209-
digitCount++
210-
return !unicode.IsDigit(c) || digitCount == 6
211-
})
212-
if endOfVersionIndex < 0 {
213-
return "", ""
214-
}
215-
if endOfVersionIndex < 5 {
216-
endOfVersionIndex = 0
217-
}
218-
version := sql[0:endOfVersionIndex]
219-
innerSQL := strings.TrimFunc(sql[endOfVersionIndex:], unicode.IsSpace)
220-
221-
return version, innerSQL
222-
}
223-
224202
const commentDirectivePreamble = "/*vt+"
225203

226204
// CommentDirectives is the parsed representation for execution directives

go/vt/sqlparser/comments_test.go

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -222,36 +222,6 @@ a`,
222222
}
223223
}
224224

225-
func TestExtractMysqlComment(t *testing.T) {
226-
testCases := []struct {
227-
input, outSQL, outVersion string
228-
}{{
229-
input: "/*!50708SET max_execution_time=5000 */",
230-
outSQL: "SET max_execution_time=5000",
231-
outVersion: "50708",
232-
}, {
233-
input: "/*!50708 SET max_execution_time=5000*/",
234-
outSQL: "SET max_execution_time=5000",
235-
outVersion: "50708",
236-
}, {
237-
input: "/*!50708* from*/",
238-
outSQL: "* from",
239-
outVersion: "50708",
240-
}, {
241-
input: "/*! SET max_execution_time=5000*/",
242-
outSQL: "SET max_execution_time=5000",
243-
outVersion: "",
244-
}}
245-
for _, testCase := range testCases {
246-
gotVersion, gotSQL := ExtractMysqlComment(testCase.input)
247-
assert.Equal(t, testCase.outVersion, gotVersion, "version mismatch")
248-
249-
if gotSQL != testCase.outSQL {
250-
t.Errorf("test input: '%s', got SQL\n%+v, want\n%+v", testCase.input, gotSQL, testCase.outSQL)
251-
}
252-
}
253-
}
254-
255225
func TestExtractCommentDirectives(t *testing.T) {
256226
testCases := []struct {
257227
input string

go/vt/sqlparser/testdata/select_cases.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5972,7 +5972,7 @@ INPUT
59725972
select 1 + /*!00000 2 node_modules/ + 3 /*!99999 noise*/ + 4;
59735973
END
59745974
ERROR
5975-
syntax error at position 57
5975+
syntax error at position 36
59765976
END
59775977
INPUT
59785978
select 1, st_touches(t.geom, p.geom) from tbl_polygon t, tbl_polygon p where t.id = 'POLY1' and p.id = 'POLY2';
@@ -20251,8 +20251,8 @@ END
2025120251
INPUT
2025220252
select 1/*!2*/;
2025320253
END
20254-
OUTPUT
20255-
select 1 from dual
20254+
ERROR
20255+
syntax error at position 13 near '2'
2025620256
END
2025720257
INPUT
2025820258
select a, t1.* as 'with_alias', b from t1;
@@ -22238,7 +22238,7 @@ INPUT
2223822238
select 1/*!000002*/;
2223922239
END
2224022240
ERROR
22241-
syntax error at position 20 near '2'
22241+
syntax error at position 18 near '2'
2224222242
END
2224322243
INPUT
2224422244
select 1, min(a) from t1i where a=99;

0 commit comments

Comments
 (0)