1616 */
1717class Tokenizer
1818{
19- /**
20- * @var int
21- */
22- protected $ offset = 0 ;
19+ protected int $ offset = 0 ;
2320
24- /**
25- * @var string
26- */
27- protected $ input ;
21+ protected string $ input ;
2822
29- /**
30- * @var string
31- */
32- protected $ delimiter_pattern = "; " ;
23+ protected string $ delimiter_pattern = "; " ;
3324
3425 /**
3526 * @param string $input
3627 *
3728 * @return array tree-structure of SQL tokens
3829 */
39- public static function tokenize (string $ input )
30+ public static function tokenize (string $ input ): array
4031 {
4132 $ parser = new self ($ input );
4233
@@ -51,12 +42,14 @@ protected function __construct(string $input)
5142 /**
5243 * @return string[]
5344 */
54- protected function statements ()
45+ protected function statements (): array
5546 {
5647 $ statements = [];
5748
58- while ($ result = $ this ->statement ()) {
49+ $ result = $ this ->statement ();
50+ while ($ result ) {
5951 $ statements [] = $ result ;
52+ $ result = $ this ->statement ();
6053 }
6154
6255 return $ statements ;
@@ -65,7 +58,7 @@ protected function statements()
6558 /**
6659 * @return string[]|null
6760 */
68- protected function statement ()
61+ protected function statement (): ? array
6962 {
7063 $ this ->consume ('\s* ' );
7164
@@ -74,9 +67,11 @@ protected function statement()
7467 }
7568
7669 $ tokens = [];
77-
78- while ("" !== $ token = $ this -> token () ) {
70+ $ token = $ this -> token ();
71+ while ($ token !== "" ) {
7972 if (is_string ($ token ) && preg_match ('/^delimiter$/i ' , $ token ) === 1 ) {
73+ // Omit DELIMITER command - it isn't part of SQL statement syntax
74+
8075 $ this ->consume ('[ ]* ' );
8176
8277 $ delimiter = trim ($ this ->consume ('.*?[\r\n]+ ' ));
@@ -87,61 +82,73 @@ protected function statement()
8782
8883 $ this ->delimiter_pattern = preg_quote ($ delimiter );
8984
90- continue ; // omits DELIMITER command - it isn't part of SQL statement syntax
85+ } else {
86+ $ tokens [] = $ token ;
9187 }
92-
93- $ tokens [] = $ token ;
88+ $ token = $ this ->token ();
9489 }
9590
9691 return $ tokens ;
9792 }
9893
9994 /**
95+ * TODO: Refactor this - cyclomatic complexity > 10
96+ *
10097 * @return array|string
10198 */
102- protected function token ()
99+ protected function token (): array | string
103100 {
104101 if ($ this ->consume ($ this ->delimiter_pattern )) {
105102 return "" ; // end of statement
106103 }
107104
108- if ("" !== $ token = $ this ->consume ('\w+ ' )) {
105+ $ token = $ this ->consume ('\w+ ' );
106+ if ($ token !== "" ) {
109107 return $ token ;
110108 }
111109
112- if ($ token = $ this ->consume ('\s+ ' )) {
110+ $ token = $ this ->consume ('\s+ ' );
111+ if ($ token ) {
113112 return $ token ;
114113 }
115114
116- if ($ token = $ this ->comment ()) {
115+ $ token = $ this ->comment ();
116+ if ($ token ) {
117117 return $ token ;
118118 }
119119
120- if ($ token = $ this ->consume ('\@\w+ ' )) {
120+ $ token = $ this ->consume ('\@\w+ ' );
121+ if ($ token ) {
121122 return $ token ; // @var
122123 }
123124
124- if ($ token = $ this ->consume (':\w+ ' )) {
125- return $ token ; // :var (PDO placeholder)
125+ $ token = $ this ->consume (':\w+ ' );
126+ if ($ token ) {
127+ return $ token ; // PDO placeholder
126128 }
127129
128- if ($ token = $ this ->consume ('[+\-\*\/.,!=^|&<>:@%~#]+ ' )) {
130+ $ token = $ this ->consume ('[+\-\*\/.,!=^|&<>:@%~#]+ ' );
131+ if ($ token ) {
129132 return $ token ; // various operators
130133 }
131134
132- if ($ token = $ this ->consume ('; ' )) {
135+ $ token = $ this ->consume ('; ' );
136+ if ($ token ) {
133137 return $ token ; // statement separator (when $delimiter_pattern has been modified)
134138 }
135139
136- if ($ token = $ this ->quoted ()) {
140+ $ token = $ this ->quoted ();
141+ if ($ token ) {
137142 return $ token ;
138143 }
139144
140- if ($ tokens = $ this ->grouped ()) {
145+ $ tokens = $ this ->grouped ();
146+ if ($ tokens ) {
141147 return $ tokens ;
142148 }
143149
144- if ($ token = $ this ->dollarquoted ()) {
150+ $ token = $ this ->dollarquoted ();
151+ if ($ token ) {
145152 return $ token ;
146153 }
147154
@@ -152,18 +159,17 @@ protected function token()
152159 $ this ->fail ("expected SQL token " );
153160 }
154161
155- /**
156- * @return string|null
157- */
158- protected function comment ()
162+ protected function comment (): ?string
159163 {
160- if ($ start = $ this ->consume ('-- ' )) {
164+ $ start = $ this ->consume ('-- ' );
165+ if ($ start ) {
161166 $ comment = $ this ->consume ("[^ \r\n]* " );
162167
163168 return "{$ start }{$ comment }" ;
164169 }
165170
166- if ($ start = $ this ->consume ('\/\* ' )) {
171+ $ start = $ this ->consume ('\/\* ' );
172+ if ($ start ) {
167173 $ comment = $ this ->consume ('.*?\*\/ ' );
168174
169175 if ($ comment ) {
@@ -176,12 +182,10 @@ protected function comment()
176182 return null ;
177183 }
178184
179- /**
180- * @return string|null
181- */
182- protected function dollarquoted ()
185+ protected function dollarquoted (): ?string
183186 {
184- if ($ delimiter = $ this ->consume ('\$\w*\$ ' )) {
187+ $ delimiter = $ this ->consume ('\$\w*\$ ' );
188+ if ($ delimiter ) {
185189 $ end_delimiter = preg_quote ($ delimiter );
186190
187191 $ body = $ this ->consume (".*? {$ end_delimiter }" );
@@ -198,18 +202,16 @@ protected function dollarquoted()
198202 return null ;
199203 }
200204
201- /**
202- * @return array|null
203- */
204- protected function grouped ()
205+ protected function grouped (): ?array
205206 {
206207 static $ end = [
207208 "( " => ") " ,
208209 "{ " => "} " ,
209210 "[ " => "] " ,
210211 ];
211212
212- if ($ opening = $ this ->consume ('[({\[] ' )) {
213+ $ opening = $ this ->consume ('[({\[] ' );
214+ if ($ opening ) {
213215 $ closing = $ end [$ opening ];
214216
215217 $ tokens = [$ opening ];
@@ -218,12 +220,13 @@ protected function grouped()
218220 if ($ this ->is ($ closing )) {
219221 $ tokens [] = $ closing ;
220222
221- $ this ->offset +=1 ;
223+ $ this ->offset += 1 ;
222224
223225 return $ tokens ;
224226 }
225227
226- if ("" !== $ token = $ this ->token ()) {
228+ $ token = $ this ->token ();
229+ if ($ token !== "" ) {
227230 $ tokens [] = $ token ;
228231 } else {
229232 $ this ->fail ("expected token or group end: {$ closing }" );
@@ -237,9 +240,11 @@ protected function grouped()
237240 /**
238241 * @return string|null
239242 */
240- protected function quoted ()
243+ protected function quoted (): ? string
241244 {
242- if ($ quote = $ this ->consume ('[` \'"] ' )) {
245+ $ quote = $ this ->consume ('[` \'"] ' );
246+
247+ if ($ quote ) {
243248 $ tokens = [$ quote ];
244249
245250 $ not_quote = '[^ ' . preg_quote ($ quote ) . '\\\\]* ' ;
@@ -261,7 +266,9 @@ protected function quoted()
261266 return implode ('' , $ tokens );
262267 }
263268
264- if ("" !== $ token = $ this ->consume ($ not_quote )) {
269+ $ token = $ this ->consume ($ not_quote );
270+
271+ if ($ token !== "" ) {
265272 $ tokens [] = $ token ;
266273
267274 continue ;
@@ -286,7 +293,7 @@ protected function is(string $exact): bool
286293
287294 protected function matches (string $ pattern ): bool
288295 {
289- return preg_match ("/ {$ pattern }/sA " , $ this ->input , $ matches , 0 , $ this ->offset ) === 1 ;
296+ return preg_match (pattern: "/ {$ pattern }/sA " , subject: $ this ->input , offset: $ this ->offset ) === 1 ;
290297 }
291298
292299 protected function consume (string $ pattern ): string
@@ -300,8 +307,9 @@ protected function consume(string $pattern): string
300307 return '' ;
301308 }
302309
303- protected function fail (string $ why )
310+ protected function fail (string $ why ): void
304311 {
305- throw new RuntimeException ("unexpected input: {$ why }, at: {$ this ->offset }, got: \"" . substr ($ this ->input , $ this ->offset , 1 ) . "\"" );
312+ throw new RuntimeException ("unexpected input: {$ why }, at: {$ this ->offset }, got: \"" . substr ($ this ->input ,
313+ $ this ->offset , 1 ) . "\"" );
306314 }
307315}
0 commit comments