1616 */
1717class Tokenizer
1818{
19- /**
20- * @var int
21- */
22- protected $ offset = 0 ;
19+ protected int $ offset = 0 ;
2320
24- /**
25- * @var string
26- */
27- protected $ input ;
21+ protected string $ input ;
2822
29- /**
30- * @var string
31- */
32- protected $ delimiter_pattern = "; " ;
23+ protected string $ delimiter_pattern = "; " ;
3324
3425 /**
3526 * @param string $input
3627 *
3728 * @return array tree-structure of SQL tokens
3829 */
39- public static function tokenize (string $ input )
30+ public static function tokenize (string $ input ): array
4031 {
4132 $ parser = new self ($ input );
4233
@@ -51,7 +42,7 @@ protected function __construct(string $input)
5142 /**
5243 * @return string[]
5344 */
54- protected function statements ()
45+ protected function statements (): array
5546 {
5647 $ statements = [];
5748
@@ -62,6 +53,7 @@ protected function statements()
6253 }
6354 } while ($ result !== null );
6455
56+
6557 return $ statements ;
6658 }
6759
@@ -74,7 +66,7 @@ protected function statements()
7466 *
7567 * @return string[]|null
7668 */
77- protected function statement ()
69+ protected function statement (): ? array
7870 {
7971 $ this ->consume ('\s* ' );
8072
@@ -84,12 +76,15 @@ protected function statement()
8476
8577 $ tokens = [];
8678
87- while ("" !== $ token = $ this ->token ()) {
79+ $ token = $ this ->token ();
80+ while ($ token !== "" ) {
8881 /**
8982 * DEV NOTE: This checks for DELIMITER statement, that changes delimiter from ; to something else.
9083 * If detected, it will extract the new DELIMITER and reassign $this->delimiter_pattern.
9184 */
9285 if (is_string ($ token ) && preg_match ('/^delimiter$/i ' , $ token ) === 1 ) {
86+ // Omit DELIMITER command - it isn't part of SQL statement syntax
87+
9388 $ this ->consume ('[ ]* ' );
9489
9590 $ delimiter = trim ($ this ->consume ('.*?[\r\n]+ ' ));
@@ -100,61 +95,71 @@ protected function statement()
10095
10196 $ this ->delimiter_pattern = preg_quote ($ delimiter );
10297
103- continue ; // omits DELIMITER command - it isn't part of SQL statement syntax
98+ } else {
99+ $ tokens [] = $ token ;
104100 }
105-
106- $ tokens [] = $ token ;
101+ $ token = $ this ->token ();
107102 }
108103
109104 return $ tokens ;
110105 }
111106
112107 /**
113- * @return array|string
108+ * TODO: Refactor this - cyclomatic complexity > 10
114109 */
115- protected function token ()
110+ protected function token (): array | string
116111 {
117112 if ($ this ->consume ($ this ->delimiter_pattern )) {
118113 return "" ; // end of statement
119114 }
120115
121- if ("" !== $ token = $ this ->consume ('\w+ ' )) {
116+ $ token = $ this ->consume ('\w+ ' );
117+ if ($ token !== "" ) {
122118 return $ token ;
123119 }
124120
125- if ($ token = $ this ->consume ('\s+ ' )) {
121+ $ token = $ this ->consume ('\s+ ' );
122+ if ($ token ) {
126123 return $ token ;
127124 }
128125
129- if ($ token = $ this ->comment ()) {
126+ $ token = $ this ->comment ();
127+ if ($ token ) {
130128 return $ token ;
131129 }
132130
133- if ($ token = $ this ->consume ('\@\w+ ' )) {
131+ $ token = $ this ->consume ('\@\w+ ' );
132+ if ($ token ) {
134133 return $ token ; // @var
135134 }
136135
137- if ($ token = $ this ->consume (':\w+ ' )) {
138- return $ token ; // :var (PDO placeholder)
136+ $ token = $ this ->consume (':\w+ ' );
137+ if ($ token ) {
138+ return $ token ; // PDO placeholder
139139 }
140140
141- if ($ token = $ this ->consume ('[+\-\*\/.,!=^|&<>:@%~#]+ ' )) {
141+ $ token = $ this ->consume ('[+\-\*\/.,!=^|&<>:@%~#]+ ' );
142+ if ($ token ) {
142143 return $ token ; // various operators
143144 }
144145
145- if ($ token = $ this ->consume ('; ' )) {
146+ $ token = $ this ->consume ('; ' );
147+ if ($ token ) {
146148 return $ token ; // statement separator (when $delimiter_pattern has been modified)
147149 }
148150
149- if ($ token = $ this ->quoted ()) {
151+ $ token = $ this ->quoted ();
152+ if ($ token ) {
150153 return $ token ;
151154 }
152155
153- if ($ tokens = $ this ->grouped ()) {
156+ $ tokens = $ this ->grouped ();
157+ if ($ tokens ) {
154158 return $ tokens ;
155159 }
156160
157- if ($ token = $ this ->dollarquoted ()) {
161+ $ token = $ this ->dollarquoted ();
162+ if ($ token ) {
158163 return $ token ;
159164 }
160165
@@ -165,18 +170,17 @@ protected function token()
165170 $ this ->fail ("expected SQL token " );
166171 }
167172
168- /**
169- * @return string|null
170- */
171- protected function comment ()
173+ protected function comment (): ?string
172174 {
173- if ($ start = $ this ->consume ('-- ' )) {
175+ $ start = $ this ->consume ('-- ' );
176+ if ($ start ) {
174177 $ comment = $ this ->consume ("[^ \r\n]* " );
175178
176179 return "{$ start }{$ comment }" ;
177180 }
178181
179- if ($ start = $ this ->consume ('\/\* ' )) {
182+ $ start = $ this ->consume ('\/\* ' );
183+ if ($ start ) {
180184 $ comment = $ this ->consume ('.*?\*\/ ' );
181185
182186 if ($ comment ) {
@@ -189,12 +193,10 @@ protected function comment()
189193 return null ;
190194 }
191195
192- /**
193- * @return string|null
194- */
195- protected function dollarquoted ()
196+ protected function dollarquoted (): ?string
196197 {
197- if ($ delimiter = $ this ->consume ('\$\w*\$ ' )) {
198+ $ delimiter = $ this ->consume ('\$\w*\$ ' );
199+ if ($ delimiter ) {
198200 $ end_delimiter = preg_quote ($ delimiter );
199201
200202 $ body = $ this ->consume (".*? {$ end_delimiter }" );
@@ -211,18 +213,16 @@ protected function dollarquoted()
211213 return null ;
212214 }
213215
214- /**
215- * @return array|null
216- */
217- protected function grouped ()
216+ protected function grouped (): ?array
218217 {
219218 static $ end = [
220219 "( " => ") " ,
221220 "{ " => "} " ,
222221 "[ " => "] " ,
223222 ];
224223
225- if ($ opening = $ this ->consume ('[({\[] ' )) {
224+ $ opening = $ this ->consume ('[({\[] ' );
225+ if ($ opening ) {
226226 $ closing = $ end [$ opening ];
227227
228228 $ tokens = [$ opening ];
@@ -236,7 +236,8 @@ protected function grouped()
236236 return $ tokens ;
237237 }
238238
239- if ("" !== $ token = $ this ->token ()) {
239+ $ token = $ this ->token ();
240+ if ($ token !== "" ) {
240241 $ tokens [] = $ token ;
241242 } else {
242243 $ this ->fail ("expected token or group end: {$ closing }" );
@@ -247,12 +248,11 @@ protected function grouped()
247248 return null ;
248249 }
249250
250- /**
251- * @return string|null
252- */
253- protected function quoted ()
251+ protected function quoted (): ?string
254252 {
255- if ($ quote = $ this ->consume ('[` \'"] ' )) {
253+ $ quote = $ this ->consume ('[` \'"] ' );
254+
255+ if ($ quote ) {
256256 $ tokens = [$ quote ];
257257
258258 $ not_quote = '[^ ' . preg_quote ($ quote ) . '\\\\]* ' ;
@@ -274,7 +274,9 @@ protected function quoted()
274274 return implode ('' , $ tokens );
275275 }
276276
277- if ("" !== $ token = $ this ->consume ($ not_quote )) {
277+ $ token = $ this ->consume ($ not_quote );
278+
279+ if ($ token !== "" ) {
278280 $ tokens [] = $ token ;
279281
280282 continue ;
@@ -299,7 +301,7 @@ protected function is(string $exact): bool
299301
300302 protected function matches (string $ pattern ): bool
301303 {
302- return preg_match ("/ {$ pattern }/sA " , $ this ->input , $ matches , 0 , $ this ->offset ) === 1 ;
304+ return preg_match (pattern: "/ {$ pattern }/sA " , subject: $ this ->input , offset: $ this ->offset ) === 1 ;
303305 }
304306
305307 protected function consume (string $ pattern ): string
@@ -313,8 +315,9 @@ protected function consume(string $pattern): string
313315 return '' ;
314316 }
315317
316- protected function fail (string $ why )
318+ protected function fail (string $ why ): void
317319 {
318- throw new RuntimeException ("unexpected input: {$ why }, at: {$ this ->offset }, got: \"" . substr ($ this ->input , $ this ->offset , 1 ) . "\"" );
320+ throw new RuntimeException ("unexpected input: {$ why }, at: {$ this ->offset }, got: \"" . substr ($ this ->input ,
321+ $ this ->offset , 1 ) . "\"" );
319322 }
320323}
0 commit comments