Skip to content

Commit 7aaa80f

Browse files
authored
Feature/faster parsing (#26)
Faster parsing, accomplished via preg_split on string then working with the array of characters
1 parent 3541747 commit 7aaa80f

4 files changed

Lines changed: 4847 additions & 19 deletions

File tree

src/HJSON/HJSONParser.php

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
class HJSONParser
66
{
77

8-
private $text;
8+
private $textArray;
9+
private $textLengthChars;
910
private $at; // The index of the current character
1011
private $ch; // The current character
1112
private $escapee = [];
@@ -29,7 +30,9 @@ public function __construct()
2930
public function parse($source, $options = [])
3031
{
3132
$this->keepWsc = $options && isset($options['keepWsc']) && $options['keepWsc'];
32-
$this->text = $source;
33+
$this->textArray = preg_split("//u", $source, null, PREG_SPLIT_NO_EMPTY);
34+
$this->textLengthChars = count($this->textArray);
35+
3336
$data = $this->rootValue();
3437

3538
if ($options && isset($options['assoc']) && $options['assoc']) {
@@ -312,25 +315,33 @@ private function error($m)
312315
$colBytes = 0;
313316
$line=1;
314317

318+
// Start with where we're at now, count back to most recent line break
319+
// - to determine "column" of error hit
315320
$i = $this->at;
316321
while ($i > 0) {
317-
$ch = mb_substr(mb_strcut($this->text, $i - 1), 0, 1);
318-
$i -= strlen($ch);
322+
// Mimic old behavior with mb_substr
323+
if ($i >= $this->textLengthChars) {
324+
$ch = "";
325+
} else {
326+
$ch = $this->textArray[$i];
327+
}
319328

329+
--$i;
320330
if ($ch === "\n") {
321331
break;
322332
}
323333

324334
$col++;
325-
$colBytes += strlen($ch);
326335
}
327336

337+
// Count back line endings from there to determine line# of error hit
328338
for (; $i > 0; $i--) {
329-
if ($this->text[$i] === "\n") {
339+
if ($this->textArray[$i] === "\n") {
330340
$line++;
331341
}
332342
}
333-
throw new HJSONException("$m at line $line, $col >>>". mb_substr(mb_strcut($this->text, $this->at - $colBytes), 0, 20) ." ...");
343+
344+
throw new HJSONException("$m at line $line, $col >>>". implode(array_slice($this->textArray, $this->at - $col, 20)) ." ...");
334345
}
335346

336347
private function next($c = false)
@@ -343,19 +354,29 @@ private function next($c = false)
343354

344355
// Get the next character. When there are no more characters,
345356
// return the empty string.
346-
$this->ch = (strlen($this->text) > $this->at) ? mb_substr(mb_strcut($this->text, $this->at), 0, 1) : null;
347-
$this->at += strlen($this->ch);
357+
$this->ch = ($this->textLengthChars > $this->at) ? $this->textArray[$this->at] : null;
358+
++$this->at;
348359
return $this->ch;
349360
}
350361

362+
/**
363+
* Peek at character at given offset from current "at"
364+
* - >=0 - ahead of "at"
365+
* - <0 = before "at"
366+
*/
351367
private function peek($offs)
352368
{
353-
// range check is not required
354-
if ($offs >= 0) {
355-
return mb_substr(mb_strcut($this->text, $this->at), $offs, 1);
356-
} else {
357-
return mb_substr(mb_strcut($this->text, 0, $this->at), $offs, 1);
369+
$index = $this->at + $offs;
370+
371+
// Mimic old behavior with mb_substr
372+
if ($index < 0) {
373+
$index = 0;
358374
}
375+
if ($index >= $this->textLengthChars) {
376+
return "";
377+
}
378+
379+
return $this->textArray[$index];
359380
}
360381

361382
private function skipIndent($indent)
@@ -525,19 +546,20 @@ private function getComment($wat)
525546
$i;
526547
$wat--;
527548
// remove trailing whitespace
528-
for ($i = $this->at - 2; $i > $wat && $this->text[$i] <= ' ' && $this->text[$i] !== "\n"; $i--) {
549+
for ($i = $this->at - 2; $i > $wat && $this->textArray[$i] <= ' ' && $this->textArray[$i] !== "\n"; $i--) {
529550
}
530551

531552
// but only up to EOL
532-
if ($this->text[$i] === "\n") {
553+
if ($i > 0 && $this->textArray[$i] === "\n") {
533554
$i--;
534555
}
535-
if ($this->text[$i] === "\r") {
556+
if ($i > 0 && $this->textArray[$i] === "\r") {
536557
$i--;
537558
}
538559

539-
$res = mb_substr($this->text, $wat, $i-$wat+1);
540-
for ($i = 0; $i < mb_strlen($res); $i++) {
560+
$res = array_slice($this->textArray, $wat, $i-$wat+1);
561+
$res_len = count($res);
562+
for ($i = 0; $i < $res_len; $i++) {
541563
if ($res[$i] > ' ') {
542564
return $res;
543565
}

0 commit comments

Comments
 (0)