Skip to content

Commit 779ed6e

Browse files
committed
Add SQLite multi-query parser
Implement SqliteMultiQueryParser with support for all three SQLite identifier quoting styles (double quotes, backticks, brackets), single-quoted strings with doubled-quote escaping, non-nesting block comments, and BEGIN...END compound statements for CREATE TRIGGER while correctly treating BEGIN TRANSACTION as a simple statement.
1 parent 97d4ae0 commit 779ed6e

3 files changed

Lines changed: 559 additions & 0 deletions

File tree

src/SqliteMultiQueryParser.php

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
<?php declare(strict_types = 1);
2+
3+
namespace Nextras\MultiQueryParser;
4+
5+
use Iterator;
6+
7+
8+
class SqliteMultiQueryParser extends BaseMultiQueryParser
9+
{
10+
public function parseStringStream(Iterator $stream): Iterator
11+
{
12+
$patternIterator = new PatternIterator($stream, $this->getQueryPattern());
13+
14+
foreach ($patternIterator as $match) {
15+
if (isset($match['query']) && $match['query'] !== '') {
16+
yield $match['query'];
17+
}
18+
}
19+
}
20+
21+
22+
private function getQueryPattern(): string
23+
{
24+
$simpleQuery = /** @lang PhpRegExp */ '~
25+
(?:
26+
\s
27+
| /\* (*PRUNE) (?: [^*]++ | \*(?!/) )*+ \*/
28+
| -- [^\n]*+
29+
)*+
30+
(?<simplequery>
31+
(?:
32+
[^;\'"`[/-]++
33+
| \' (*PRUNE) (?: \'\' | [^\'] )*+ \'
34+
| " (*PRUNE) (?: "" | [^"] )*+ "
35+
| ` (*PRUNE) (?: `` | [^`] )*+ `
36+
| \[ (*PRUNE) [^\]]*+ (?: \]\] [^\]]*+ )* \]
37+
| /\* (*PRUNE) (?: [^*]++ | \*(?!/) )*+ \*/
38+
| -- [^\n]*+
39+
| (?!;) .
40+
)++
41+
)
42+
;
43+
~x';
44+
return /** @lang PhpRegExp */ '~
45+
(?:
46+
\s
47+
| /\* (*PRUNE) (?: [^*]++ | \*(?!/) )*+ \*/
48+
| -- [^\n]*+
49+
)*+
50+
51+
(?:
52+
(?:
53+
(?<query>
54+
(?:
55+
[^bB;\'"`[/-]++
56+
| \' (*PRUNE) (?: \'\' | [^\'] )*+ \'
57+
| " (*PRUNE) (?: "" | [^"] )*+ "
58+
| ` (*PRUNE) (?: `` | [^`] )*+ `
59+
| \[ (*PRUNE) [^\]]*+ (?: \]\] [^\]]*+ )* \]
60+
| /\* (*PRUNE) (?: [^*]++ | \*(?!/) )*+ \*/
61+
| (?i:BEGIN) (?!\s*(?:(?i:TRANSACTION|DEFERRED|IMMEDIATE|EXCLUSIVE)\b|;|\z)) (*PRUNE) (?: (?i:\s*END)\s*| ' . substr($simpleQuery, 1, -2) . ')*
62+
| -- [^\n]*+
63+
| (?!;) .
64+
)*+
65+
)
66+
(?: ; | \z )
67+
)
68+
|
69+
(?:
70+
\z
71+
)
72+
)
73+
~xsAS';
74+
}
75+
}
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
<?php declare(strict_types = 1);
2+
3+
/**
4+
* @testCase
5+
*/
6+
7+
namespace Nextras\MultiQueryParser;
8+
9+
use Tester\Assert;
10+
11+
12+
require_once __DIR__ . '/../bootstrap.php';
13+
require_once __DIR__ . '/../inc/MultiQueryParserTestCase.php';
14+
15+
16+
class SqliteMultiQueryParserTest extends MultiQueryParserTestCase
17+
{
18+
protected function createParser(): IMultiQueryParser
19+
{
20+
return new SqliteMultiQueryParser();
21+
}
22+
23+
24+
protected function getDataFilePath(): string
25+
{
26+
return __DIR__ . '/../data/sqlite.sql';
27+
}
28+
29+
30+
protected function getExpectedFileQueryCount(): int
31+
{
32+
return 62;
33+
}
34+
35+
36+
public function testFile(): void
37+
{
38+
$parser = $this->createParser();
39+
$queries = iterator_to_array($parser->parseFile($this->getDataFilePath()));
40+
Assert::count($this->getExpectedFileQueryCount(), $queries);
41+
Assert::same("CREATE TRIGGER trigger_book_collections_update
42+
AFTER UPDATE ON book_collections
43+
FOR EACH ROW
44+
BEGIN
45+
UPDATE book_collections SET updated_at = datetime('now') WHERE id = NEW.id;
46+
END", $queries[19]);
47+
}
48+
49+
50+
/**
51+
* @return list<array{string, list<string>}>
52+
*/
53+
protected function provideSuperfluousSemicolonsData(): array
54+
{
55+
return [
56+
[
57+
'SELECT 1 AS semicolon_madness;;;',
58+
['SELECT 1 AS semicolon_madness'],
59+
],
60+
[
61+
';;',
62+
[],
63+
],
64+
[
65+
';;;',
66+
[],
67+
],
68+
[
69+
';SELECT 1;',
70+
['SELECT 1'],
71+
],
72+
[
73+
'SELECT 1;;SELECT 2;',
74+
['SELECT 1', 'SELECT 2'],
75+
],
76+
[
77+
'SELECT 1; ; SELECT 2;',
78+
['SELECT 1', 'SELECT 2'],
79+
],
80+
];
81+
}
82+
83+
84+
/**
85+
* @return list<array{string, list<string>}>
86+
*/
87+
protected function provideEdgeCasesData(): array
88+
{
89+
return [
90+
// Empty / whitespace-only input
91+
['', []],
92+
[" \n\t\n ", []],
93+
94+
// Single-quoted strings protect semicolons
95+
["SELECT 'a;b';", ["SELECT 'a;b'"]],
96+
["SELECT ';;;';", ["SELECT ';;;'"]],
97+
["SELECT '';", ["SELECT ''"]],
98+
99+
// Doubled single quotes
100+
["SELECT 'it''s';", ["SELECT 'it''s'"]],
101+
102+
// Double-quoted identifiers protect semicolons
103+
['SELECT "col;name" FROM t;', ['SELECT "col;name" FROM t']],
104+
105+
// Doubled double quotes inside identifiers
106+
['SELECT "col""name" FROM t;', ['SELECT "col""name" FROM t']],
107+
108+
// Backtick identifiers protect semicolons
109+
['SELECT `col;name` FROM t;', ['SELECT `col;name` FROM t']],
110+
111+
// Doubled backticks inside identifiers
112+
['SELECT `col``name` FROM t;', ['SELECT `col``name` FROM t']],
113+
114+
// Bracket identifiers protect semicolons
115+
['SELECT [col;name] FROM t;', ['SELECT [col;name] FROM t']],
116+
['SELECT [a;b], [c;d] FROM t;', ['SELECT [a;b], [c;d] FROM t']],
117+
118+
// Escaped brackets (doubled ]) inside bracket identifiers
119+
['SELECT [col]]name] FROM t;', ['SELECT [col]]name] FROM t']],
120+
121+
// Semicolons inside comments are not delimiters
122+
["SELECT /* ; */ 1;", ["SELECT /* ; */ 1"]],
123+
["SELECT 1; -- has ; in comment\nSELECT 2;", ["SELECT 1", "SELECT 2"]],
124+
125+
// Line comment inside a query
126+
["SELECT 1 -- comment with ;\nSELECT 2;", ["SELECT 1 -- comment with ;\nSELECT 2"]],
127+
128+
// Queries without trailing semicolon
129+
["SELECT 1", ["SELECT 1"]],
130+
["SELECT 1; SELECT 2", ["SELECT 1", "SELECT 2"]],
131+
132+
// Forward slash and dash not starting comments
133+
["SELECT 5/3;", ["SELECT 5/3"]],
134+
["SELECT 5-3;", ["SELECT 5-3"]],
135+
136+
// Only comments
137+
["/* only a comment */", []],
138+
["-- only a comment", []],
139+
140+
// Comment positioning
141+
["/* prefix */ SELECT 1;", ["SELECT 1"]],
142+
["-- prefix\nSELECT 1;", ["SELECT 1"]],
143+
["SELECT 1; /* between */ SELECT 2;", ["SELECT 1", "SELECT 2"]],
144+
145+
// Block comment edge cases
146+
["SELECT /* contains * star */ 1;", ["SELECT /* contains * star */ 1"]],
147+
148+
// Non-nesting block comments (SQLite does NOT support nesting)
149+
["SELECT /* outer /* inner */ 1;", ["SELECT /* outer /* inner */ 1"]],
150+
151+
// CRLF line endings
152+
["SELECT 1;\r\nSELECT 2;\r\n", ["SELECT 1", "SELECT 2"]],
153+
154+
// BEGIN...END block with internal semicolons (treated as single query)
155+
[
156+
"BEGIN\n\tSELECT 1;\n\tSELECT 2;\nEND;",
157+
["BEGIN\n\tSELECT 1;\n\tSELECT 2;\nEND"],
158+
],
159+
160+
// BEGIN...END with only END (no internal queries)
161+
["BEGIN END;", ["BEGIN END"]],
162+
["BEGIN\nEND;", ["BEGIN\nEND"]],
163+
164+
// BEGIN keyword inside a string literal (should not trigger BEGIN...END)
165+
["SELECT 'BEGIN';", ["SELECT 'BEGIN'"]],
166+
['SELECT "BEGIN";', ['SELECT "BEGIN"']],
167+
168+
// Multiple BEGIN...END blocks as separate queries
169+
[
170+
"BEGIN\n\tSELECT 1;\nEND;\nBEGIN\n\tSELECT 2;\nEND;",
171+
["BEGIN\n\tSELECT 1;\nEND", "BEGIN\n\tSELECT 2;\nEND"],
172+
],
173+
174+
// BEGIN...END with string containing semicolons
175+
[
176+
"BEGIN\n\tSELECT 'a;b';\nEND;",
177+
["BEGIN\n\tSELECT 'a;b';\nEND"],
178+
],
179+
180+
// BEGIN...END preceded by other content (CREATE TRIGGER)
181+
[
182+
"CREATE TRIGGER t AFTER INSERT ON x FOR EACH ROW\nBEGIN\n\tSELECT 1;\nEND;",
183+
["CREATE TRIGGER t AFTER INSERT ON x FOR EACH ROW\nBEGIN\n\tSELECT 1;\nEND"],
184+
],
185+
186+
// BEGIN TRANSACTION is a simple statement (not compound)
187+
["BEGIN TRANSACTION; SELECT 1;", ["BEGIN TRANSACTION", "SELECT 1"]],
188+
["BEGIN DEFERRED; SELECT 1;", ["BEGIN DEFERRED", "SELECT 1"]],
189+
["BEGIN IMMEDIATE; SELECT 1;", ["BEGIN IMMEDIATE", "SELECT 1"]],
190+
["BEGIN EXCLUSIVE; SELECT 1;", ["BEGIN EXCLUSIVE", "SELECT 1"]],
191+
192+
// Bare BEGIN; is a simple statement (transaction)
193+
["BEGIN; SELECT 1;", ["BEGIN", "SELECT 1"]],
194+
195+
// Mixed identifier styles in one query
196+
['SELECT [a], "b", `c` FROM t;', ['SELECT [a], "b", `c` FROM t']],
197+
];
198+
}
199+
200+
201+
/**
202+
* @return list<array{list<string>, list<string>}>
203+
*/
204+
protected function provideChunkBoundaryData(): array
205+
{
206+
return [
207+
// Single-quoted string spanning chunks
208+
[
209+
["SELECT 'a;b", "c';"],
210+
["SELECT 'a;bc'"],
211+
],
212+
// Double-quoted identifier spanning chunks
213+
[
214+
['SELECT "a;b', 'c";'],
215+
['SELECT "a;bc"'],
216+
],
217+
// Backtick identifier spanning chunks
218+
[
219+
['SELECT `a;b', 'c`;'],
220+
['SELECT `a;bc`'],
221+
],
222+
// Bracket identifier spanning chunks
223+
[
224+
["SELECT [col;na", "me] FROM t;"],
225+
["SELECT [col;name] FROM t"],
226+
],
227+
// Block comment spanning chunks
228+
[
229+
["SELECT /* a;b", "c */ 1;"],
230+
["SELECT /* a;bc */ 1"],
231+
],
232+
// Block comment in leading whitespace spanning chunks
233+
[
234+
["/* x;y", "z */ SELECT 1;"],
235+
["SELECT 1"],
236+
],
237+
// BEGIN...END with string spanning chunks
238+
[
239+
["BEGIN\n\tSELECT 'a;b", "c';\nEND;"],
240+
["BEGIN\n\tSELECT 'a;bc';\nEND"],
241+
],
242+
];
243+
}
244+
}
245+
246+
247+
(new SqliteMultiQueryParserTest())->run();

0 commit comments

Comments
 (0)