|
| 1 | +<?php |
| 2 | + |
| 3 | +namespace DevTheorem\HandlebarsParser; |
| 4 | + |
| 5 | +use DevTheorem\HandlebarsParser\Ast\BlockStatement; |
| 6 | +use DevTheorem\HandlebarsParser\Ast\CommentStatement; |
| 7 | +use DevTheorem\HandlebarsParser\Ast\ContentStatement; |
| 8 | +use DevTheorem\HandlebarsParser\Ast\MustacheStatement; |
| 9 | +use DevTheorem\HandlebarsParser\Ast\PartialBlockStatement; |
| 10 | +use DevTheorem\HandlebarsParser\Ast\PartialStatement; |
| 11 | +use DevTheorem\HandlebarsParser\Ast\Program; |
| 12 | +use DevTheorem\HandlebarsParser\Ast\Statement; |
| 13 | + |
| 14 | +/** |
| 15 | + * Post-parse AST visitor that handles whitespace stripping. |
| 16 | + * |
| 17 | + * Implements the same logic as the reference JS parser: |
| 18 | + * https://github.com/handlebars-lang/handlebars-parser/blob/master/lib/whitespace-control.js |
| 19 | + */ |
| 20 | +class WhitespaceControl |
| 21 | +{ |
| 22 | + private bool $isRootSeen = false; |
| 23 | + |
| 24 | + public function __construct( |
| 25 | + private readonly bool $ignoreStandalone = false, |
| 26 | + ) {} |
| 27 | + |
| 28 | + public function accept(Program $program): Program |
| 29 | + { |
| 30 | + return $this->visitProgram($program); |
| 31 | + } |
| 32 | + |
| 33 | + /** |
| 34 | + * Dispatch to the appropriate visitor method for a statement node. |
| 35 | + */ |
| 36 | + private function visitNode(Statement $node): ?StripInfo |
| 37 | + { |
| 38 | + if ($node instanceof BlockStatement || $node instanceof PartialBlockStatement) { |
| 39 | + return $this->visitBlock($node); |
| 40 | + } |
| 41 | + if ($node instanceof MustacheStatement) { |
| 42 | + return new StripInfo( |
| 43 | + open: $node->strip->open, |
| 44 | + close: $node->strip->close, |
| 45 | + ); |
| 46 | + } |
| 47 | + if ($node instanceof CommentStatement || $node instanceof PartialStatement) { |
| 48 | + return new StripInfo( |
| 49 | + open: $node->strip->open, |
| 50 | + close: $node->strip->close, |
| 51 | + inlineStandalone: true, |
| 52 | + ); |
| 53 | + } |
| 54 | + |
| 55 | + return null; |
| 56 | + } |
| 57 | + |
| 58 | + private function visitProgram(Program $program): Program |
| 59 | + { |
| 60 | + $doStandalone = !$this->ignoreStandalone; |
| 61 | + |
| 62 | + $isRoot = !$this->isRootSeen; |
| 63 | + $this->isRootSeen = true; |
| 64 | + |
| 65 | + $body = $program->body; |
| 66 | + |
| 67 | + for ($i = 0, $l = count($body); $i < $l; $i++) { |
| 68 | + $current = $body[$i]; |
| 69 | + $strip = $this->visitNode($current); |
| 70 | + |
| 71 | + if ($strip === null) { |
| 72 | + continue; |
| 73 | + } |
| 74 | + |
| 75 | + $prevWS = $this->isPrevWhitespace($body, $i, $isRoot); |
| 76 | + $nextWS = $this->isNextWhitespace($body, $i, $isRoot); |
| 77 | + |
| 78 | + $openStandalone = $strip->openStandalone && $prevWS; |
| 79 | + $closeStandalone = $strip->closeStandalone && $nextWS; |
| 80 | + $inlineStandalone = $strip->inlineStandalone && $prevWS && $nextWS; |
| 81 | + |
| 82 | + if ($strip->close) { |
| 83 | + $this->omitRight($body, $i, true); |
| 84 | + } |
| 85 | + if ($strip->open) { |
| 86 | + $this->omitLeft($body, $i, true); |
| 87 | + } |
| 88 | + |
| 89 | + if ($doStandalone && $inlineStandalone) { |
| 90 | + $this->omitRight($body, $i); |
| 91 | + |
| 92 | + if ($this->omitLeft($body, $i)) { |
| 93 | + // If we are on a standalone node, save the indent info for partials |
| 94 | + if ($current instanceof PartialStatement) { |
| 95 | + $previous = $body[$i - 1]; |
| 96 | + if (!$previous instanceof ContentStatement) { |
| 97 | + throw new \Exception('Previous unexpectedly not a ContentStatement'); |
| 98 | + } |
| 99 | + |
| 100 | + // Pull out the whitespace from the final line |
| 101 | + preg_match('/([ \t]+$)/', $previous->original, $m); |
| 102 | + $current->indent = $m[1] ?? ''; |
| 103 | + } |
| 104 | + } |
| 105 | + } |
| 106 | + if ($doStandalone && $openStandalone) { |
| 107 | + /** @var BlockStatement|PartialBlockStatement $current */ |
| 108 | + $innerBody = ($current->program ?? $current->inverse ?? throw new \Exception('Missing program'))->body; |
| 109 | + $this->omitRight($innerBody); |
| 110 | + |
| 111 | + // Strip out the previous content node if it's whitespace only |
| 112 | + $this->omitLeft($body, $i); |
| 113 | + } |
| 114 | + if ($doStandalone && $closeStandalone) { |
| 115 | + // Always strip the next node |
| 116 | + $this->omitRight($body, $i); |
| 117 | + |
| 118 | + /** @var BlockStatement|PartialBlockStatement $current */ |
| 119 | + $innerBody = ($current->inverse ?? $current->program)->body; |
| 120 | + $this->omitLeft($innerBody); |
| 121 | + } |
| 122 | + } |
| 123 | + |
| 124 | + return $program; |
| 125 | + } |
| 126 | + |
| 127 | + private function visitBlock(BlockStatement|PartialBlockStatement $block): StripInfo |
| 128 | + { |
| 129 | + if ($block->program) { |
| 130 | + $this->visitProgram($block->program); |
| 131 | + } |
| 132 | + if ($block instanceof BlockStatement && $block->inverse) { |
| 133 | + $this->visitProgram($block->inverse); |
| 134 | + } |
| 135 | + |
| 136 | + // Find the inverse program that is involved with whitespace stripping. |
| 137 | + $program = $block instanceof BlockStatement |
| 138 | + ? ($block->program ?? $block->inverse) |
| 139 | + : $block->program; |
| 140 | + |
| 141 | + $inverse = ($block instanceof BlockStatement && $block->program && $block->inverse) |
| 142 | + ? $block->inverse |
| 143 | + : null; |
| 144 | + $firstInverse = $inverse; |
| 145 | + $lastInverse = $inverse; |
| 146 | + |
| 147 | + if ($inverse !== null && $inverse->chained && $inverse->body[0] instanceof BlockStatement) { |
| 148 | + $firstInverse = $inverse->body[0]->program; |
| 149 | + |
| 150 | + // Walk the inverse chain to find the last inverse that is actually in the chain. |
| 151 | + while ($lastInverse?->chained) { |
| 152 | + $lastInverseBlockStatement = $lastInverse->body[array_key_last($lastInverse->body)]; |
| 153 | + if ($lastInverseBlockStatement instanceof BlockStatement) { |
| 154 | + $lastInverse = $lastInverseBlockStatement->program; |
| 155 | + } |
| 156 | + } |
| 157 | + } |
| 158 | + |
| 159 | + $strip = new StripInfo( |
| 160 | + open: $block->openStrip->open, |
| 161 | + close: $block->closeStrip->close ?? false, |
| 162 | + |
| 163 | + // Determine the standalone candidacy. Basically flag our content as being |
| 164 | + // possibly standalone so our parent can determine if we actually are standalone. |
| 165 | + openStandalone: $program !== null && $this->isNextWhitespace($program->body), |
| 166 | + closeStandalone: $this->isPrevWhitespace( |
| 167 | + ($firstInverse ?? $program)->body ?? [], |
| 168 | + ), |
| 169 | + ); |
| 170 | + |
| 171 | + if ($block->openStrip->close && $program !== null) { |
| 172 | + $this->omitRight($program->body, null, true); |
| 173 | + } |
| 174 | + |
| 175 | + if ($inverse !== null) { |
| 176 | + /** @var BlockStatement $block */ |
| 177 | + $inverseStrip = $block->inverseStrip; |
| 178 | + |
| 179 | + if ($inverseStrip?->open && $program !== null) { |
| 180 | + $this->omitLeft($program->body, null, true); |
| 181 | + } |
| 182 | + |
| 183 | + if ($inverseStrip?->close && $firstInverse !== null) { |
| 184 | + $this->omitRight($firstInverse->body, null, true); |
| 185 | + } |
| 186 | + if ($block->closeStrip?->open && $lastInverse !== null) { |
| 187 | + $this->omitLeft($lastInverse->body, null, true); |
| 188 | + } |
| 189 | + |
| 190 | + // Find standalone else statements |
| 191 | + if ( |
| 192 | + !$this->ignoreStandalone |
| 193 | + && $program !== null |
| 194 | + && $firstInverse !== null |
| 195 | + && $this->isPrevWhitespace($program->body) |
| 196 | + && $this->isNextWhitespace($firstInverse->body) |
| 197 | + ) { |
| 198 | + $this->omitLeft($program->body); |
| 199 | + $this->omitRight($firstInverse->body); |
| 200 | + } |
| 201 | + } elseif ($block->closeStrip?->open && $program !== null) { |
| 202 | + $this->omitLeft($program->body, null, true); |
| 203 | + } |
| 204 | + |
| 205 | + return $strip; |
| 206 | + } |
| 207 | + |
| 208 | + /** |
| 209 | + * Check if the node to the left of position i is whitespace-only on the current line. |
| 210 | + * |
| 211 | + * @param Statement[] $body |
| 212 | + */ |
| 213 | + private function isPrevWhitespace(array $body, ?int $i = null, bool $isRoot = false): bool |
| 214 | + { |
| 215 | + if ($i === null) { |
| 216 | + $i = count($body); |
| 217 | + } |
| 218 | + |
| 219 | + // Nodes that end with newlines are considered whitespace (but are special-cased for strip operations) |
| 220 | + $prev = $body[$i - 1] ?? null; |
| 221 | + $sibling = $body[$i - 2] ?? null; |
| 222 | + |
| 223 | + if ($prev === null) { |
| 224 | + return $isRoot; |
| 225 | + } |
| 226 | + |
| 227 | + if ($prev instanceof ContentStatement) { |
| 228 | + $pattern = ($sibling || !$isRoot) ? '/\r?\n\s*?$/' : '/(^|\r?\n)\s*?$/'; |
| 229 | + return (bool) preg_match($pattern, $prev->original); |
| 230 | + } |
| 231 | + |
| 232 | + return false; |
| 233 | + } |
| 234 | + |
| 235 | + /** |
| 236 | + * Check if the node to the right of position i is whitespace-only on the current line. |
| 237 | + * |
| 238 | + * @param Statement[] $body |
| 239 | + */ |
| 240 | + private function isNextWhitespace(array $body, ?int $i = null, bool $isRoot = false): bool |
| 241 | + { |
| 242 | + if ($i === null) { |
| 243 | + $i = -1; |
| 244 | + } |
| 245 | + |
| 246 | + $next = $body[$i + 1] ?? null; |
| 247 | + $sibling = $body[$i + 2] ?? null; |
| 248 | + |
| 249 | + if ($next === null) { |
| 250 | + return $isRoot; |
| 251 | + } |
| 252 | + |
| 253 | + if ($next instanceof ContentStatement) { |
| 254 | + $pattern = ($sibling || !$isRoot) ? '/^\s*?\r?\n/' : '/^\s*?(\r?\n|$)/'; |
| 255 | + return (bool) preg_match($pattern, $next->original); |
| 256 | + } |
| 257 | + |
| 258 | + return false; |
| 259 | + } |
| 260 | + |
| 261 | + /** |
| 262 | + * Marks the node to the right of the position as omitted. |
| 263 | + * I.e. {{foo}}' ' will mark the ' ' node as omitted. |
| 264 | + * |
| 265 | + * If $i is null, then the first child will be marked as such. |
| 266 | + * |
| 267 | + * If $multiple is true then all whitespace will be stripped out until |
| 268 | + * non-whitespace content is met. |
| 269 | + * |
| 270 | + * @param Statement[] $body |
| 271 | + */ |
| 272 | + private function omitRight(array $body, ?int $i = null, bool $multiple = false): void |
| 273 | + { |
| 274 | + $current = $body[$i === null ? 0 : $i + 1] ?? null; |
| 275 | + |
| 276 | + if ( |
| 277 | + !$current instanceof ContentStatement |
| 278 | + || (!$multiple && $current->rightStripped) |
| 279 | + ) { |
| 280 | + return; |
| 281 | + } |
| 282 | + |
| 283 | + $original = $current->value; |
| 284 | + $current->value = ($multiple |
| 285 | + ? preg_replace('/^\s+/', '', $current->value) |
| 286 | + : preg_replace('/^[ \t]*\r?\n?/', '', $current->value)) ?? ''; |
| 287 | + $current->rightStripped = ($current->value !== $original); |
| 288 | + } |
| 289 | + |
| 290 | + /** |
| 291 | + * Marks the node to the left of the position as omitted. |
| 292 | + * I.e. ' '{{foo}} will mark the ' ' node as omitted. |
| 293 | + * |
| 294 | + * If $i is null then the last child will be marked as such. |
| 295 | + * |
| 296 | + * If $multiple is true then all whitespace will be stripped out until |
| 297 | + * non-whitespace content is met. |
| 298 | + * |
| 299 | + * @param Statement[] $body |
| 300 | + */ |
| 301 | + private function omitLeft(array $body, ?int $i = null, bool $multiple = false): bool |
| 302 | + { |
| 303 | + $current = $body[$i === null ? count($body) - 1 : $i - 1] ?? null; |
| 304 | + |
| 305 | + if ( |
| 306 | + !$current instanceof ContentStatement |
| 307 | + || (!$multiple && $current->leftStripped) |
| 308 | + ) { |
| 309 | + return false; |
| 310 | + } |
| 311 | + |
| 312 | + // We omit the last node if it's whitespace only and not preceded by a non-content node. |
| 313 | + $original = $current->value; |
| 314 | + $current->value = ($multiple |
| 315 | + ? preg_replace('/\s+$/', '', $current->value) |
| 316 | + : preg_replace('/[ \t]+$/', '', $current->value)) ?? ''; |
| 317 | + $current->leftStripped = ($current->value !== $original); |
| 318 | + |
| 319 | + return $current->leftStripped; |
| 320 | + } |
| 321 | +} |
0 commit comments