Skip to content

Commit db3fcb9

Browse files
committed
[TASK] switch 2 dindent
1 parent 1d36525 commit db3fcb9

3 files changed

Lines changed: 19 additions & 278 deletions

File tree

Classes/Service/CleanHtmlService.php

Lines changed: 9 additions & 268 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ public function clean(string $html, array $config = []): string
8383
}
8484

8585
// convert line-breaks to UNIX
86-
$this->convNlOs($html);
86+
$html = preg_replace("(\r\n|\r)", $this->newline, $html);
8787

8888
$manipulations = [];
8989

@@ -95,16 +95,17 @@ public function clean(string $html, array $config = []): string
9595
$manipulations['removeComments'] = GeneralUtility::makeInstance(RemoveComments::class);
9696
}
9797

98-
if (!empty($this->headerComment)) {
99-
$this->includeHeaderComment($html);
100-
}
101-
10298
foreach ($manipulations as $key => $manipulation) {
10399
/** @var ManipulationInterface $manipulation */
104100
$configuration = isset($config[$key . '.']) && \is_array($config[$key . '.']) ? $config[$key . '.'] : [];
105101
$html = $manipulation->manipulate($html, $configuration);
106102
}
107103

104+
// include configured header comment in HTML content block
105+
if (!empty($this->headerComment)) {
106+
$html = preg_replace('/^(-->)$/m', "\n\t" . $this->headerComment . "\n$1", $html, 1);
107+
}
108+
108109
// cleanup HTML5 self-closing elements
109110
if (!isset($GLOBALS['TSFE']->config['config']['doctype'])
110111
|| 'x' !== substr($GLOBALS['TSFE']->config['config']['doctype'], 0, 1)) {
@@ -115,276 +116,16 @@ public function clean(string $html, array $config = []): string
115116
);
116117
}
117118

118-
if ($this->formatType > 0) {
119-
$html = $this->formatHtml($html);
119+
if ($this->formatType) {
120+
$indenter = new \Gajus\Dindent\Indenter(['indentation_character' => $this->tab]);
121+
$html = $indenter->indent($html);
120122
}
121123

122-
// remove white space after line ending
123-
$this->rTrimLines($html);
124-
125124
// recover line-breaks
126125
if (Environment::isWindows()) {
127126
$html = str_replace($this->newline, "\r\n", $html);
128127
}
129128

130129
return (string) $html;
131130
}
132-
133-
/**
134-
* Formats the (X)HTML code:
135-
* - taps according to the hirarchy of the tags
136-
* - removes empty spaces between tags
137-
* - removes linebreaks within tags (spares where necessary: pre, textarea, comments, ..)
138-
* choose from five options:
139-
* 0 => off
140-
* 1 => no line break at all (code in one line)
141-
* 2 => minimalistic line breaks (structure defining box-elements)
142-
* 3 => aesthetic line breaks (important box-elements)
143-
* 4 => logic line breaks (all box-elements)
144-
* 5 => max line breaks (all elements).
145-
*/
146-
protected function formatHtml(string $html): string
147-
{
148-
// Save original formated pre, textarea, comments, styles and scripts & replace them with markers
149-
preg_match_all(
150-
'/(?s)((<!--.*?-->)|(<[ \n\r]*pre[^>]*>.*?<[ \n\r]*\/pre[^>]*>)|(<[ \n\r]*textarea[^>]*>.*?<[ \n\r]*\/textarea[^>]*>)|(<[ \n\r]*style[^>]*>.*?<[ \n\r]*\/style[^>]*>)|(<[ \n\r]*script[^>]*>.*?<[ \n\r]*\/script[^>]*>))/im',
151-
$html,
152-
$matches
153-
);
154-
$noFormat = $matches[0]; // do not format these block elements
155-
for ($i = 0; $i < \count($noFormat); ++$i) {
156-
$html = str_replace($noFormat[$i], "\n<!-- ELEMENT {$i} -->", $html);
157-
}
158-
159-
// define box elements for formatting
160-
$trueBoxElements = 'address|blockquote|center|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section';
161-
$functionalBoxElements = 'dd|dt|frameset|li|tbody|td|tfoot|th|thead|tr|colgroup';
162-
$usableBoxElements = 'applet|button|del|iframe|ins|map|object|script';
163-
$imagineBoxElements = 'html|body|head|meta|title|link|script|base|!--';
164-
$allBoxLikeElements = '(?>' . $trueBoxElements . '|' . $functionalBoxElements . '|' . $usableBoxElements . '|' . $imagineBoxElements . ')';
165-
$esteticBoxLikeElements = '(?>html|head|body|meta name|title|div|table|h1|h2|h3|h4|h5|h6|p|form|pre|center|!--)';
166-
$structureBoxLikeElements = '(?>html|head|body|div|!--)';
167-
168-
// split html into it's elements
169-
$htmlArrayTemp = preg_split(
170-
'/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/',
171-
$html,
172-
-1,
173-
\PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY
174-
);
175-
176-
if (false === $htmlArrayTemp) {
177-
// Restore saved comments, styles and scripts
178-
for ($i = 0; $i < \count($noFormat); ++$i) {
179-
$html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
180-
}
181-
182-
return $html;
183-
}
184-
// remove empty lines
185-
$htmlArray = [''];
186-
$index = 1;
187-
for ($x = 0; $x < \count($htmlArrayTemp); ++$x) {
188-
$text = trim($htmlArrayTemp[$x]);
189-
$htmlArray[$index] = '' !== $text ? $htmlArrayTemp[$x] : $this->emptySpaceChar;
190-
++$index;
191-
}
192-
193-
// rebuild html
194-
$html = '';
195-
$tabs = 0;
196-
for ($x = 0; $x < \count($htmlArray); ++$x) {
197-
$htmlArrayBefore = $htmlArray[$x - 1] ?? '';
198-
$htmlArrayCurrent = $htmlArray[$x] ?? '';
199-
200-
// check if the element should stand in a new line
201-
$newline = false;
202-
if ('<?xml' == substr($htmlArrayBefore, 0, 5)) {
203-
$newline = true;
204-
} elseif (2 == $this->formatType && ( // minimalistic line break
205-
// this element has a line break before itself
206-
preg_match(
207-
'/<' . $structureBoxLikeElements . '(.*)>/Usi',
208-
$htmlArrayCurrent
209-
) || preg_match(
210-
'/<' . $structureBoxLikeElements . '(.*) \/>/Usi',
211-
$htmlArrayCurrent
212-
) // one element before is a element that has a line break after
213-
|| preg_match(
214-
'/<\/' . $structureBoxLikeElements . '(.*)>/Usi',
215-
$htmlArrayBefore
216-
) || '<!--' == substr(
217-
$htmlArrayBefore,
218-
0,
219-
4
220-
) || preg_match('/<' . $structureBoxLikeElements . '(.*) \/>/Usi', $htmlArrayBefore))
221-
) {
222-
$newline = true;
223-
} elseif (3 == $this->formatType && ( // aestetic line break
224-
// this element has a line break before itself
225-
preg_match(
226-
'/<' . $esteticBoxLikeElements . '(.*)>/Usi',
227-
$htmlArrayCurrent
228-
) || preg_match(
229-
'/<' . $esteticBoxLikeElements . '(.*) \/>/Usi',
230-
$htmlArrayCurrent
231-
) // one element before is a element that has a line break after
232-
|| preg_match('/<\/' . $esteticBoxLikeElements . '(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
233-
$htmlArrayBefore,
234-
0,
235-
4
236-
) || preg_match('/<' . $esteticBoxLikeElements . '(.*) \/>/Usi', $htmlArrayBefore))
237-
) {
238-
$newline = true;
239-
} elseif ($this->formatType >= 4 && ( // logical line break
240-
// this element has a line break before itself
241-
preg_match(
242-
'/<' . $allBoxLikeElements . '(.*)>/Usi',
243-
$htmlArrayCurrent
244-
) || preg_match(
245-
'/<' . $allBoxLikeElements . '(.*) \/>/Usi',
246-
$htmlArrayCurrent
247-
) // one element before is a element that has a line break after
248-
|| preg_match('/<\/' . $allBoxLikeElements . '(.*)>/Usi', $htmlArrayBefore) || '<!--' == substr(
249-
$htmlArrayBefore,
250-
0,
251-
4
252-
) || preg_match('/<' . $allBoxLikeElements . '(.*) \/>/Usi', $htmlArrayBefore))
253-
) {
254-
$newline = true;
255-
}
256-
257-
// count down a tab
258-
if ('</' == substr($htmlArrayCurrent, 0, 2)) {
259-
--$tabs;
260-
}
261-
262-
// add tabs and line breaks in front of the current tag
263-
if ($newline) {
264-
$html .= $this->newline;
265-
for ($y = 0; $y < $tabs; ++$y) {
266-
$html .= $this->tab;
267-
}
268-
}
269-
270-
// remove white spaces and line breaks and add current tag to the html-string
271-
if ('<![CDATA[' == substr($htmlArrayCurrent, 0, 9) // remove multiple white space in CDATA / XML
272-
|| '<?xml' == substr($htmlArrayCurrent, 0, 5)
273-
) {
274-
$html .= $this->killWhiteSpace($htmlArrayCurrent);
275-
} else { // remove all line breaks
276-
$html .= $this->killLineBreaks($htmlArrayCurrent);
277-
}
278-
279-
// count up a tab
280-
if ('<' == substr($htmlArrayCurrent, 0, 1) && '/' != substr($htmlArrayCurrent, 1, 1)) {
281-
if (' ' !== substr($htmlArrayCurrent, 1, 1)
282-
&& 'img' !== substr($htmlArrayCurrent, 1, 3)
283-
&& 'source' !== substr($htmlArrayCurrent, 1, 6)
284-
&& 'br' !== substr($htmlArrayCurrent, 1, 2)
285-
&& 'hr' !== substr($htmlArrayCurrent, 1, 2)
286-
&& 'input' !== substr($htmlArrayCurrent, 1, 5)
287-
&& 'link' !== substr($htmlArrayCurrent, 1, 4)
288-
&& 'meta' !== substr($htmlArrayCurrent, 1, 4)
289-
&& 'col ' !== substr($htmlArrayCurrent, 1, 4)
290-
&& 'frame' !== substr($htmlArrayCurrent, 1, 5)
291-
&& 'isindex' !== substr($htmlArrayCurrent, 1, 7)
292-
&& 'param' !== substr($htmlArrayCurrent, 1, 5)
293-
&& 'area' !== substr($htmlArrayCurrent, 1, 4)
294-
&& 'base' !== substr($htmlArrayCurrent, 1, 4)
295-
&& '<!' !== substr($htmlArrayCurrent, 0, 2)
296-
&& '<?xml' !== substr($htmlArrayCurrent, 0, 5)
297-
) {
298-
++$tabs;
299-
}
300-
}
301-
}
302-
303-
// Remove empty lines
304-
if ($this->formatType > 1) {
305-
$this->removeEmptyLines($html);
306-
}
307-
308-
// Restore saved comments, styles and scripts
309-
for ($i = 0; $i < \count($noFormat); ++$i) {
310-
$html = str_replace("<!-- ELEMENT {$i} -->", $noFormat[$i], $html);
311-
}
312-
313-
// include debug comment at the end
314-
if (0 != $tabs && true === $this->debugComment) {
315-
$html .= "<!-- {$tabs} open elements found -->";
316-
}
317-
318-
return $html;
319-
}
320-
321-
/**
322-
* Remove ALL line breaks and multiple white space.
323-
*/
324-
protected function killLineBreaks(string $html): string
325-
{
326-
$html = str_replace($this->newline, '', $html);
327-
328-
return preg_replace('/\s\s+/u', ' ', $html);
329-
// ? return preg_replace('/\n|\s+(\s)/u', '$1', $html);
330-
}
331-
332-
/**
333-
* Remove multiple white space, keeps line breaks.
334-
*/
335-
protected function killWhiteSpace(string $html): string
336-
{
337-
$temp = explode($this->newline, $html);
338-
for ($i = 0; $i < \count($temp); ++$i) {
339-
if (!trim($temp[$i])) {
340-
unset($temp[$i]);
341-
continue;
342-
}
343-
344-
$temp[$i] = trim($temp[$i]);
345-
$temp[$i] = preg_replace('/\s\s+/', ' ', $temp[$i]);
346-
}
347-
348-
return implode($this->newline, $temp);
349-
}
350-
351-
/**
352-
* Remove white space at the end of lines, keeps other white space and line breaks.
353-
*/
354-
protected function rTrimLines(string &$html): void
355-
{
356-
$html = preg_replace('/\s+$/m', '', $html);
357-
}
358-
359-
/**
360-
* Convert newlines according to the current OS.
361-
*/
362-
protected function convNlOs(string &$html): void
363-
{
364-
$html = preg_replace("(\r\n|\r)", $this->newline, $html);
365-
}
366-
367-
/**
368-
* Remove empty lines.
369-
*/
370-
protected function removeEmptyLines(string &$html): void
371-
{
372-
$temp = explode($this->newline, $html);
373-
$result = [];
374-
for ($i = 0; $i < \count($temp); ++$i) {
375-
if ('' == trim($temp[$i])) {
376-
continue;
377-
}
378-
$result[] = $temp[$i];
379-
}
380-
$html = implode($this->newline, $result);
381-
}
382-
383-
/**
384-
* Include configured header comment in HTML content block.
385-
*/
386-
public function includeHeaderComment(string &$html): void
387-
{
388-
$html = preg_replace('/^(-->)$/m', "\n\t" . $this->headerComment . "\n$1", $html);
389-
}
390131
}

README.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,3 @@ composer install jweiland/replacer
184184
|:----------------------------------|:------------|:-----------------------------------------------------------------|:-------------------|
185185
| svgstore.enabled | boolean | Is the SVG extract & merge enabled for this template | 1 |
186186
| svgstore.fileSize | integer | Maximum file size of a SVG to include (in `[byte]`) | 50000 |
187-
188-
---
189-
##### ToDo:
190-
- Try external packages like https://github.com/ArjanSchouten/HtmlMinifier

composer.json

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,14 @@
66
"license": "GPL-2.0-or-later",
77
"require": {
88
"php": "^8.1",
9-
"typo3/cms-core": "^12.4||^13.4"
9+
"typo3/cms-core": "^12.4||^13.4",
10+
"schleuse/dindent": "^2.6"
1011
},
1112
"autoload": {
1213
"psr-4": {
1314
"HTML\\Sourceopt\\": "Classes/"
1415
}
1516
},
16-
"replace": {
17-
"maxserv/replacecontent": "*",
18-
"typo3-ter/replacecontent": "*",
19-
"typo3-ter/sourceopt": "self.version"
20-
},
2117
"require-dev": {
2218
"typo3/testing-framework": "^8.2",
2319
"friendsofphp/php-cs-fixer": "^3.3",
@@ -29,6 +25,14 @@
2925
"TYPO3\\CMS\\Core\\Tests\\": ".Build/vendor/typo3/cms/typo3/sysext/core/Tests/"
3026
}
3127
},
28+
"replace": {
29+
"maxserv/replacecontent": "*",
30+
"typo3-ter/replacecontent": "*",
31+
"typo3-ter/sourceopt": "self.version"
32+
},
33+
"suggest": {
34+
"lochmueller/staticfilecache": "A very flexible and very, very, very fast cache to TYPO3"
35+
},
3236
"config": {
3337
"vendor-dir": ".Build/vendor",
3438
"bin-dir": ".Build/bin",

0 commit comments

Comments
 (0)