55class HJSONParser
66{
77
8- private $ text ;
8+ private $ textArray ;
9+ private $ textLengthChars ;
910 private $ at ; // The index of the current character
1011 private $ ch ; // The current character
1112 private $ escapee = [];
@@ -29,7 +30,9 @@ public function __construct()
2930 public function parse ($ source , $ options = [])
3031 {
3132 $ this ->keepWsc = $ options && isset ($ options ['keepWsc ' ]) && $ options ['keepWsc ' ];
32- $ this ->text = $ source ;
33+ $ this ->textArray = preg_split ("//u " , $ source , null , PREG_SPLIT_NO_EMPTY );
34+ $ this ->textLengthChars = count ($ this ->textArray );
35+
3336 $ data = $ this ->rootValue ();
3437
3538 if ($ options && isset ($ options ['assoc ' ]) && $ options ['assoc ' ]) {
@@ -312,25 +315,33 @@ private function error($m)
312315 $ colBytes = 0 ;
313316 $ line =1 ;
314317
318+ // Start with where we're at now, count back to most recent line break
319+ // - to determine "column" of error hit
315320 $ i = $ this ->at ;
316321 while ($ i > 0 ) {
317- $ ch = mb_substr (mb_strcut ($ this ->text , $ i - 1 ), 0 , 1 );
318- $ i -= strlen ($ ch );
322+ // Mimic old behavior with mb_substr
323+ if ($ i >= $ this ->textLengthChars ) {
324+ $ ch = "" ;
325+ } else {
326+ $ ch = $ this ->textArray [$ i ];
327+ }
319328
329+ --$ i ;
320330 if ($ ch === "\n" ) {
321331 break ;
322332 }
323333
324334 $ col ++;
325- $ colBytes += strlen ($ ch );
326335 }
327336
337+ // Count back line endings from there to determine line# of error hit
328338 for (; $ i > 0 ; $ i --) {
329- if ($ this ->text [$ i ] === "\n" ) {
339+ if ($ this ->textArray [$ i ] === "\n" ) {
330340 $ line ++;
331341 }
332342 }
333- throw new HJSONException ("$ m at line $ line, $ col >>> " . mb_substr (mb_strcut ($ this ->text , $ this ->at - $ colBytes ), 0 , 20 ) ." ... " );
343+
344+ throw new HJSONException ("$ m at line $ line, $ col >>> " . implode (array_slice ($ this ->textArray , $ this ->at - $ col , 20 )) ." ... " );
334345 }
335346
336347 private function next ($ c = false )
@@ -343,19 +354,29 @@ private function next($c = false)
343354
344355 // Get the next character. When there are no more characters,
345356 // return the empty string.
346- $ this ->ch = (strlen ( $ this ->text ) > $ this ->at ) ? mb_substr ( mb_strcut ( $ this ->text , $ this ->at ), 0 , 1 ) : null ;
347- $ this -> at += strlen ( $ this ->ch ) ;
357+ $ this ->ch = ($ this ->textLengthChars > $ this ->at ) ? $ this ->textArray [ $ this ->at ] : null ;
358+ ++ $ this ->at ;
348359 return $ this ->ch ;
349360 }
350361
362+ /**
363+ * Peek at character at given offset from current "at"
364+ * - >=0 - ahead of "at"
365+ * - <0 = before "at"
366+ */
351367 private function peek ($ offs )
352368 {
353- // range check is not required
354- if ( $ offs >= 0 ) {
355- return mb_substr ( mb_strcut ( $ this -> text , $ this -> at ), $ offs , 1 );
356- } else {
357- return mb_substr ( mb_strcut ( $ this -> text , 0 , $ this -> at ), $ offs , 1 ) ;
369+ $ index = $ this -> at + $ offs ;
370+
371+ // Mimic old behavior with mb_substr
372+ if ( $ index < 0 ) {
373+ $ index = 0 ;
358374 }
375+ if ($ index >= $ this ->textLengthChars ) {
376+ return "" ;
377+ }
378+
379+ return $ this ->textArray [$ index ];
359380 }
360381
361382 private function skipIndent ($ indent )
@@ -525,19 +546,20 @@ private function getComment($wat)
525546 $ i ;
526547 $ wat --;
527548 // remove trailing whitespace
528- for ($ i = $ this ->at - 2 ; $ i > $ wat && $ this ->text [$ i ] <= ' ' && $ this ->text [$ i ] !== "\n" ; $ i --) {
549+ for ($ i = $ this ->at - 2 ; $ i > $ wat && $ this ->textArray [$ i ] <= ' ' && $ this ->textArray [$ i ] !== "\n" ; $ i --) {
529550 }
530551
531552 // but only up to EOL
532- if ($ this ->text [$ i ] === "\n" ) {
553+ if ($ i > 0 && $ this ->textArray [$ i ] === "\n" ) {
533554 $ i --;
534555 }
535- if ($ this ->text [$ i ] === "\r" ) {
556+ if ($ i > 0 && $ this ->textArray [$ i ] === "\r" ) {
536557 $ i --;
537558 }
538559
539- $ res = mb_substr ($ this ->text , $ wat , $ i -$ wat +1 );
540- for ($ i = 0 ; $ i < mb_strlen ($ res ); $ i ++) {
560+ $ res = array_slice ($ this ->textArray , $ wat , $ i -$ wat +1 );
561+ $ res_len = count ($ res );
562+ for ($ i = 0 ; $ i < $ res_len ; $ i ++) {
541563 if ($ res [$ i ] > ' ' ) {
542564 return $ res ;
543565 }
0 commit comments