Skip to content

Commit 966d53b

Browse files
committed
Implement whitespace control
1 parent 3ead494 commit 966d53b

5 files changed

Lines changed: 344 additions & 3 deletions

File tree

src/Ast/ContentStatement.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44

55
class ContentStatement extends Statement
66
{
7+
public bool $rightStripped = false;
8+
public bool $leftStripped = false;
9+
710
public function __construct(
811
public string $value,
912
public string $original,

src/ParserAbstract.php

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ abstract class ParserAbstract
3636
private const SYMBOL_NONE = -1;
3737

3838
protected Lexer $lexer;
39+
protected WhitespaceControl $whitespaceControl;
3940

4041
/*
4142
* The following members will be filled with generated parsing data:
@@ -132,9 +133,10 @@ abstract protected function initReduceCallbacks(): void;
132133
/**
133134
* Creates a parser instance.
134135
*/
135-
public function __construct(Lexer $lexer)
136+
public function __construct(Lexer $lexer, WhitespaceControl $whitespaceControl)
136137
{
137138
$this->lexer = $lexer;
139+
$this->whitespaceControl = $whitespaceControl;
138140
$this->initReduceCallbacks();
139141
$this->tokenMap = $this->createTokenMap();
140142
}
@@ -147,6 +149,7 @@ public function parse(string $code): Program
147149
$tokens = $this->lexer->tokenize($code);
148150
$this->tokens = $this->postprocessTokens($tokens);
149151
$result = $this->doParse();
152+
$result = $this->whitespaceControl->accept($result);
150153

151154
// Clear out some of the interior state, so we don't hold onto unnecessary
152155
// memory between uses of the parser

src/ParserFactory.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44

55
class ParserFactory
66
{
7-
public function create(): Parser
7+
public function create(bool $ignoreStandalone = false): Parser
88
{
9-
return new Parser(new Lexer());
9+
return new Parser(new Lexer(), new WhitespaceControl($ignoreStandalone));
1010
}
1111
}

src/StripInfo.php

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<?php
2+
3+
namespace DevTheorem\HandlebarsParser;
4+
5+
readonly class StripInfo
6+
{
7+
public function __construct(
8+
public bool $open,
9+
public bool $close,
10+
public bool $openStandalone = false,
11+
public bool $closeStandalone = false,
12+
public bool $inlineStandalone = false,
13+
) {}
14+
}

src/WhitespaceControl.php

Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
<?php
2+
3+
namespace DevTheorem\HandlebarsParser;
4+
5+
use DevTheorem\HandlebarsParser\Ast\BlockStatement;
6+
use DevTheorem\HandlebarsParser\Ast\CommentStatement;
7+
use DevTheorem\HandlebarsParser\Ast\ContentStatement;
8+
use DevTheorem\HandlebarsParser\Ast\MustacheStatement;
9+
use DevTheorem\HandlebarsParser\Ast\PartialBlockStatement;
10+
use DevTheorem\HandlebarsParser\Ast\PartialStatement;
11+
use DevTheorem\HandlebarsParser\Ast\Program;
12+
use DevTheorem\HandlebarsParser\Ast\Statement;
13+
14+
/**
15+
* Post-parse AST visitor that handles whitespace stripping.
16+
*
17+
* Implements the same logic as the reference JS parser:
18+
* https://github.com/handlebars-lang/handlebars-parser/blob/master/lib/whitespace-control.js
19+
*/
20+
class WhitespaceControl
21+
{
22+
private bool $isRootSeen = false;
23+
24+
public function __construct(
25+
private readonly bool $ignoreStandalone = false,
26+
) {}
27+
28+
public function accept(Program $program): Program
29+
{
30+
return $this->visitProgram($program);
31+
}
32+
33+
/**
34+
* Dispatch to the appropriate visitor method for a statement node.
35+
*/
36+
private function visitNode(Statement $node): ?StripInfo
37+
{
38+
if ($node instanceof BlockStatement || $node instanceof PartialBlockStatement) {
39+
return $this->visitBlock($node);
40+
}
41+
if ($node instanceof MustacheStatement) {
42+
return new StripInfo(
43+
open: $node->strip->open,
44+
close: $node->strip->close,
45+
);
46+
}
47+
if ($node instanceof CommentStatement || $node instanceof PartialStatement) {
48+
return new StripInfo(
49+
open: $node->strip->open,
50+
close: $node->strip->close,
51+
inlineStandalone: true,
52+
);
53+
}
54+
55+
return null;
56+
}
57+
58+
private function visitProgram(Program $program): Program
59+
{
60+
$doStandalone = !$this->ignoreStandalone;
61+
62+
$isRoot = !$this->isRootSeen;
63+
$this->isRootSeen = true;
64+
65+
$body = $program->body;
66+
67+
for ($i = 0, $l = count($body); $i < $l; $i++) {
68+
$current = $body[$i];
69+
$strip = $this->visitNode($current);
70+
71+
if ($strip === null) {
72+
continue;
73+
}
74+
75+
$prevWS = $this->isPrevWhitespace($body, $i, $isRoot);
76+
$nextWS = $this->isNextWhitespace($body, $i, $isRoot);
77+
78+
$openStandalone = $strip->openStandalone && $prevWS;
79+
$closeStandalone = $strip->closeStandalone && $nextWS;
80+
$inlineStandalone = $strip->inlineStandalone && $prevWS && $nextWS;
81+
82+
if ($strip->close) {
83+
$this->omitRight($body, $i, true);
84+
}
85+
if ($strip->open) {
86+
$this->omitLeft($body, $i, true);
87+
}
88+
89+
if ($doStandalone && $inlineStandalone) {
90+
$this->omitRight($body, $i);
91+
92+
if ($this->omitLeft($body, $i)) {
93+
// If we are on a standalone node, save the indent info for partials
94+
if ($current instanceof PartialStatement) {
95+
$previous = $body[$i - 1];
96+
if (!$previous instanceof ContentStatement) {
97+
throw new \Exception('Previous unexpectedly not a ContentStatement');
98+
}
99+
100+
// Pull out the whitespace from the final line
101+
preg_match('/([ \t]+$)/', $previous->original, $m);
102+
$current->indent = $m[1] ?? '';
103+
}
104+
}
105+
}
106+
if ($doStandalone && $openStandalone) {
107+
/** @var BlockStatement|PartialBlockStatement $current */
108+
$innerBody = ($current->program ?? $current->inverse ?? throw new \Exception('Missing program'))->body;
109+
$this->omitRight($innerBody);
110+
111+
// Strip out the previous content node if it's whitespace only
112+
$this->omitLeft($body, $i);
113+
}
114+
if ($doStandalone && $closeStandalone) {
115+
// Always strip the next node
116+
$this->omitRight($body, $i);
117+
118+
/** @var BlockStatement|PartialBlockStatement $current */
119+
$innerBody = ($current->inverse ?? $current->program)->body;
120+
$this->omitLeft($innerBody);
121+
}
122+
}
123+
124+
return $program;
125+
}
126+
127+
private function visitBlock(BlockStatement|PartialBlockStatement $block): StripInfo
128+
{
129+
if ($block->program) {
130+
$this->visitProgram($block->program);
131+
}
132+
if ($block instanceof BlockStatement && $block->inverse) {
133+
$this->visitProgram($block->inverse);
134+
}
135+
136+
// Find the inverse program that is involved with whitespace stripping.
137+
$program = $block instanceof BlockStatement
138+
? ($block->program ?? $block->inverse)
139+
: $block->program;
140+
141+
$inverse = ($block instanceof BlockStatement && $block->program && $block->inverse)
142+
? $block->inverse
143+
: null;
144+
$firstInverse = $inverse;
145+
$lastInverse = $inverse;
146+
147+
if ($inverse !== null && $inverse->chained && $inverse->body[0] instanceof BlockStatement) {
148+
$firstInverse = $inverse->body[0]->program;
149+
150+
// Walk the inverse chain to find the last inverse that is actually in the chain.
151+
while ($lastInverse?->chained) {
152+
$lastInverseBlockStatement = $lastInverse->body[array_key_last($lastInverse->body)];
153+
if ($lastInverseBlockStatement instanceof BlockStatement) {
154+
$lastInverse = $lastInverseBlockStatement->program;
155+
}
156+
}
157+
}
158+
159+
$strip = new StripInfo(
160+
open: $block->openStrip->open,
161+
close: $block->closeStrip->close ?? false,
162+
163+
// Determine the standalone candidacy. Basically flag our content as being
164+
// possibly standalone so our parent can determine if we actually are standalone.
165+
openStandalone: $program !== null && $this->isNextWhitespace($program->body),
166+
closeStandalone: $this->isPrevWhitespace(
167+
($firstInverse ?? $program)->body ?? [],
168+
),
169+
);
170+
171+
if ($block->openStrip->close && $program !== null) {
172+
$this->omitRight($program->body, null, true);
173+
}
174+
175+
if ($inverse !== null) {
176+
/** @var BlockStatement $block */
177+
$inverseStrip = $block->inverseStrip;
178+
179+
if ($inverseStrip?->open && $program !== null) {
180+
$this->omitLeft($program->body, null, true);
181+
}
182+
183+
if ($inverseStrip?->close && $firstInverse !== null) {
184+
$this->omitRight($firstInverse->body, null, true);
185+
}
186+
if ($block->closeStrip?->open && $lastInverse !== null) {
187+
$this->omitLeft($lastInverse->body, null, true);
188+
}
189+
190+
// Find standalone else statements
191+
if (
192+
!$this->ignoreStandalone
193+
&& $program !== null
194+
&& $firstInverse !== null
195+
&& $this->isPrevWhitespace($program->body)
196+
&& $this->isNextWhitespace($firstInverse->body)
197+
) {
198+
$this->omitLeft($program->body);
199+
$this->omitRight($firstInverse->body);
200+
}
201+
} elseif ($block->closeStrip?->open && $program !== null) {
202+
$this->omitLeft($program->body, null, true);
203+
}
204+
205+
return $strip;
206+
}
207+
208+
/**
209+
* Check if the node to the left of position i is whitespace-only on the current line.
210+
*
211+
* @param Statement[] $body
212+
*/
213+
private function isPrevWhitespace(array $body, ?int $i = null, bool $isRoot = false): bool
214+
{
215+
if ($i === null) {
216+
$i = count($body);
217+
}
218+
219+
// Nodes that end with newlines are considered whitespace (but are special-cased for strip operations)
220+
$prev = $body[$i - 1] ?? null;
221+
$sibling = $body[$i - 2] ?? null;
222+
223+
if ($prev === null) {
224+
return $isRoot;
225+
}
226+
227+
if ($prev instanceof ContentStatement) {
228+
$pattern = ($sibling || !$isRoot) ? '/\r?\n\s*?$/' : '/(^|\r?\n)\s*?$/';
229+
return (bool) preg_match($pattern, $prev->original);
230+
}
231+
232+
return false;
233+
}
234+
235+
/**
236+
* Check if the node to the right of position i is whitespace-only on the current line.
237+
*
238+
* @param Statement[] $body
239+
*/
240+
private function isNextWhitespace(array $body, ?int $i = null, bool $isRoot = false): bool
241+
{
242+
if ($i === null) {
243+
$i = -1;
244+
}
245+
246+
$next = $body[$i + 1] ?? null;
247+
$sibling = $body[$i + 2] ?? null;
248+
249+
if ($next === null) {
250+
return $isRoot;
251+
}
252+
253+
if ($next instanceof ContentStatement) {
254+
$pattern = ($sibling || !$isRoot) ? '/^\s*?\r?\n/' : '/^\s*?(\r?\n|$)/';
255+
return (bool) preg_match($pattern, $next->original);
256+
}
257+
258+
return false;
259+
}
260+
261+
/**
262+
* Marks the node to the right of the position as omitted.
263+
* I.e. {{foo}}' ' will mark the ' ' node as omitted.
264+
*
265+
* If $i is null, then the first child will be marked as such.
266+
*
267+
* If $multiple is true then all whitespace will be stripped out until
268+
* non-whitespace content is met.
269+
*
270+
* @param Statement[] $body
271+
*/
272+
private function omitRight(array $body, ?int $i = null, bool $multiple = false): void
273+
{
274+
$current = $body[$i === null ? 0 : $i + 1] ?? null;
275+
276+
if (
277+
!$current instanceof ContentStatement
278+
|| (!$multiple && $current->rightStripped)
279+
) {
280+
return;
281+
}
282+
283+
$original = $current->value;
284+
$current->value = ($multiple
285+
? preg_replace('/^\s+/', '', $current->value)
286+
: preg_replace('/^[ \t]*\r?\n?/', '', $current->value)) ?? '';
287+
$current->rightStripped = ($current->value !== $original);
288+
}
289+
290+
/**
291+
* Marks the node to the left of the position as omitted.
292+
* I.e. ' '{{foo}} will mark the ' ' node as omitted.
293+
*
294+
* If $i is null then the last child will be marked as such.
295+
*
296+
* If $multiple is true then all whitespace will be stripped out until
297+
* non-whitespace content is met.
298+
*
299+
* @param Statement[] $body
300+
*/
301+
private function omitLeft(array $body, ?int $i = null, bool $multiple = false): bool
302+
{
303+
$current = $body[$i === null ? count($body) - 1 : $i - 1] ?? null;
304+
305+
if (
306+
!$current instanceof ContentStatement
307+
|| (!$multiple && $current->leftStripped)
308+
) {
309+
return false;
310+
}
311+
312+
// We omit the last node if it's whitespace only and not preceded by a non-content node.
313+
$original = $current->value;
314+
$current->value = ($multiple
315+
? preg_replace('/\s+$/', '', $current->value)
316+
: preg_replace('/[ \t]+$/', '', $current->value)) ?? '';
317+
$current->leftStripped = ($current->value !== $original);
318+
319+
return $current->leftStripped;
320+
}
321+
}

0 commit comments

Comments
 (0)