From 45cd2e1acd5bc571ba686c7dbe0acac1071a21fa Mon Sep 17 00:00:00 2001 From: Chris Huber Date: Sun, 17 May 2026 18:07:40 -0400 Subject: [PATCH] Refresh vendored HTML converter --- composer.lock | 8 +- .../includes/class-transform-registry.php | 178 ++++++++++++++++-- .../html-to-blocks-converter/raw-handler.php | 21 +++ .../tests/smoke-action-text-transforms.php | 10 +- .../tests/smoke-branded-link-spans.php | 23 ++- .../smoke-decorative-visual-clusters.php | 12 ++ .../smoke-repeated-card-grid-transforms.php | 14 ++ 7 files changed, 243 insertions(+), 23 deletions(-) diff --git a/composer.lock b/composer.lock index bc45868..16e565a 100644 --- a/composer.lock +++ b/composer.lock @@ -726,12 +726,12 @@ "source": { "type": "git", "url": "https://github.com/chubes4/html-to-blocks-converter.git", - "reference": "1b8e69715fd05f4a4c525980889ac50c4a8569e6" + "reference": "0108e2445ff75ca0b7028eb14fac43c6aaec000d" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/chubes4/html-to-blocks-converter/zipball/1b8e69715fd05f4a4c525980889ac50c4a8569e6", - "reference": "1b8e69715fd05f4a4c525980889ac50c4a8569e6", + "url": "https://api.github.com/repos/chubes4/html-to-blocks-converter/zipball/0108e2445ff75ca0b7028eb14fac43c6aaec000d", + "reference": "0108e2445ff75ca0b7028eb14fac43c6aaec000d", "shasum": "" }, "require": { @@ -759,7 +759,7 @@ "source": "https://github.com/chubes4/html-to-blocks-converter/tree/main", "issues": "https://github.com/chubes4/html-to-blocks-converter/issues" }, - "time": "2026-05-15T12:00:47+00:00" + "time": "2026-05-17T21:58:39+00:00" }, { "name": "fidry/console", diff --git a/vendor_prefixed/chubes4/html-to-blocks-converter/includes/class-transform-registry.php b/vendor_prefixed/chubes4/html-to-blocks-converter/includes/class-transform-registry.php index dc14590..a4cb902 100644 --- a/vendor_prefixed/chubes4/html-to-blocks-converter/includes/class-transform-registry.php +++ b/vendor_prefixed/chubes4/html-to-blocks-converter/includes/class-transform-registry.php @@ -729,7 +729,15 @@ private static function extract_balanced_li(string $html): ?string */ private static function get_button_transforms() { - return array(array('blockName' => 'core/group', 'priority' => 8, 'selector' => 'div,p', 'isMatch' => function ($element) { + return array(array('blockName' => 'core/paragraph', 'priority' => 8, 'selector' => 'div', 'isMatch' => function ($element) { + return self::is_aria_hidden_inline_span_container($element); + }, 'transform' => function ($element) { + return self::create_aria_hidden_inline_span_paragraph($element); + }), array('blockName' => 'core/paragraph', 'priority' => 8, 'selector' => 'div', 'isMatch' => function ($element) { + return self::is_visual_diagram_span_container($element); + }, 'transform' => function ($element) { + return self::create_aria_hidden_inline_span_paragraph($element); + }), array('blockName' => 'core/group', 'priority' => 8, 'selector' => 'div,p', 'isMatch' => function ($element) { return self::is_static_visual_button_container($element); }, 'transform' => function ($element) { return self::create_static_visual_button_group($element); @@ -741,6 +749,10 @@ private static function get_button_transforms() return self::is_button_anchor_container($element) || self::is_single_button_anchor_wrapper($element); }, 'transform' => function ($element) { return self::create_buttons_block_from_container($element); + }), array('blockName' => 'core/paragraph', 'priority' => 9, 'selector' => 'a', 'isMatch' => function ($element) { + return $element->get_tag_name() === 'A' && self::is_branded_inline_anchor($element); + }, 'transform' => function ($element) { + return self::create_branded_inline_anchor_paragraph($element); }), array('blockName' => 'core/buttons', 'priority' => 9, 'selector' => 'a', 'isMatch' => function ($element) { return $element->get_tag_name() === 'A' && self::is_button_like_anchor($element); }, 'transform' => function ($element) { @@ -900,6 +912,110 @@ private static function create_static_visual_button_paragraph($element): array $attributes['content'] = $element->get_inner_html(); return HTML_To_Blocks_Block_Factory::create_block('core/paragraph', $attributes); } + /** + * Checks whether an aria-hidden div is only inline span labels. + * + * Decorative rulers and visual scales commonly use direct span ticks inside an + * aria-hidden div. Convert those wrappers to a paragraph so Gutenberg does not + * insert an extra paragraph inside a group and break span-level CSS layouts. + * + * @param HTML_To_Blocks_HTML_Element $element Element to inspect. + * @return bool True when the wrapper can safely become paragraph markup. + */ + private static function is_aria_hidden_inline_span_container($element): bool + { + if ('DIV' !== $element->get_tag_name() || !$element->has_attribute('aria-hidden')) { + return \false; + } + if ('true' !== \strtolower(\trim((string) $element->get_attribute('aria-hidden')))) { + return \false; + } + if ('' === \trim($element->get_text_content())) { + return \false; + } + return self::html_contains_only_direct_spans($element->get_inner_html()); + } + /** + * Checks whether a visible diagram div contains only inline span labels. + * + * Diagram layers commonly position direct span labels. Keep those labels as + * direct paragraph content instead of wrapping them in an inserted paragraph + * inside a group, which changes diagram sizing and vertical rhythm. + * + * @param HTML_To_Blocks_HTML_Element $element Element to inspect. + * @return bool True when the wrapper can safely become paragraph markup. + */ + private static function is_visual_diagram_span_container($element): bool + { + if ('DIV' !== $element->get_tag_name() || !self::class_matches($element, '/(?:^|[-_\s])diagram(?:$|[-_\s])/i')) { + return \false; + } + if ('' === \trim($element->get_text_content())) { + return \false; + } + return self::html_contains_only_direct_spans($element->get_inner_html()); + } + /** + * Creates a paragraph preserving direct span markup from an aria-hidden div. + * + * @param HTML_To_Blocks_HTML_Element $element Span-only wrapper. + * @return array Block array. + */ + private static function create_aria_hidden_inline_span_paragraph($element): array + { + $attributes = self::get_block_support_attributes($element, array('anchor' => \true, 'class_name' => \true, 'colors' => \true, 'typography' => \true, 'spacing' => \true, 'border' => \true)); + $attributes['content'] = \trim($element->get_inner_html()); + return HTML_To_Blocks_Block_Factory::create_block('core/paragraph', $attributes); + } + /** + * Checks whether HTML contains only sibling span elements and whitespace. + * + * @param string $html Inner HTML to inspect. + * @return bool True when no non-span sibling markup or text remains. + */ + private static function html_contains_only_direct_spans(string $html): bool + { + $remaining = $html; + if (!\preg_match_all('/]*>.*?<\/span>/is', $html, $matches)) { + return \false; + } + foreach ($matches[0] as $span_html) { + $remaining = \str_replace($span_html, '', $remaining); + } + return '' === \trim($remaining); + } + /** + * Checks whether an anchor is a branded inline link with nested visual spans. + * + * @param HTML_To_Blocks_HTML_Element $element Element to inspect. + * @return bool True when the branded link can safely become paragraph markup. + */ + private static function is_branded_inline_anchor($element): bool + { + if ('A' !== $element->get_tag_name()) { + return \false; + } + $href = \trim((string) ($element->get_attribute('href') ?? '')); + if ('' === $href || '#' !== $href[0]) { + return \false; + } + if (!self::class_matches($element, '/(?:^|[-_\s])brand(?:$|[-_\s])/i')) { + return \false; + } + return \preg_match('/<\s*span\b/i', $element->get_inner_html()) === 1; + } + /** + * Creates a paragraph preserving a branded inline anchor as editable markup. + * + * @param HTML_To_Blocks_HTML_Element $element Branded anchor element. + * @return array Block array. + */ + private static function create_branded_inline_anchor_paragraph($element): array + { + $attributes = self::get_block_support_attributes($element, array('anchor' => \true)); + $attributes['content'] = \trim($element->get_outer_html()); + return HTML_To_Blocks_Block_Factory::create_block('core/paragraph', $attributes); + } /** * Checks whether a container is explicitly an action/link row. * @@ -1017,7 +1133,7 @@ private static function is_class_sensitive_cta_anchor($element): bool if (\preg_match('/(?:^|\s)(?:wp-block-button__link|wp-element-button)(?:$|\s)/i', $class_name) === 1) { return \false; } - return \preg_match('/(?:^|\s)(?:cta-(?:btn|link)|(?:btn|link)-cta)(?:$|\s)/i', $class_name) === 1; + return \preg_match('/(?:^|\s)(?:button|cta-(?:btn|link)|(?:btn|link)-cta)(?:$|\s)/i', $class_name) === 1; } /** * Creates a buttons wrapper with one button child from an anchor. @@ -1584,7 +1700,7 @@ private static function is_div_line_code_panel($element): bool return \false; } foreach ($child->get_child_elements() as $inline_child) { - if (!\in_array($inline_child->get_tag_name(), array('SPAN', 'BR', 'CODE', 'STRONG', 'B', 'EM', 'I'), \true)) { + if (!\in_array($inline_child->get_tag_name(), array('SPAN', 'BR', 'CODE', 'STRONG', 'B', 'EM', 'I', 'SMALL'), \true)) { return \false; } } @@ -2361,19 +2477,6 @@ private static function is_safe_dimension_value(string $value): bool } return \preg_match('/^[0-9.]+(?:px|em|rem|%|vw|vh|vmin|vmax|ch|ex)?$/i', $value) === 1 || \preg_match('/^calc\(\s*[0-9.]+(?:px|em|rem|%|vw|vh|vmin|vmax|ch|ex)?\s*[-+]\s*[0-9.]+(?:px|em|rem|%|vw|vh|vmin|vmax|ch|ex)?\s*\)$/i', $value) === 1; } - /** - * Checks whether a section is a high-confidence full-bleed hero wrapper. - * - * @param HTML_To_Blocks_HTML_Element $element The source element. - * @return bool True when a default flow group would lose hero centering intent. - */ - private static function is_hero_like_section($element): bool - { - if ($element->get_tag_name() !== 'SECTION' || !$element->has_attribute('class')) { - return \false; - } - return self::class_matches($element, '/(?:^|[-_\s])(?:hero|cover|banner|masthead)(?:$|[-_\s])/i'); - } /** * Applies direct border declarations to block support attributes. * @@ -2810,6 +2913,9 @@ private static function create_card_grid_child_block($child): ?array if (\in_array($tag, array('OL', 'UL'), \true)) { return self::create_list_block_from_element($child); } + if (self::is_numbered_card_label_span($child)) { + return HTML_To_Blocks_Block_Factory::create_block('core/html', array('content' => $child->get_outer_html())); + } if ('P' === $tag || 'SPAN' === $tag) { return HTML_To_Blocks_Block_Factory::create_block('core/paragraph', \array_merge(self::get_block_support_attributes($child, array('anchor' => \true, 'class_name' => \true)), array('content' => $child->get_inner_html()))); } @@ -2818,6 +2924,22 @@ private static function create_card_grid_child_block($child): ?array } return null; } + /** + * Checks whether a card child span is a numbered label that should keep its tag. + * + * @param HTML_To_Blocks_HTML_Element $child Card child element. + * @return bool True when the span should remain source HTML. + */ + private static function is_numbered_card_label_span($child): bool + { + if ('SPAN' !== $child->get_tag_name() || !$child->has_attribute('class')) { + return \false; + } + if (!self::class_matches($child, '/(?:^|[-_\s])(?:card[-_\s]?number|item[-_\s]?number|service[-_\s]?number)(?:$|[-_\s])/i')) { + return \false; + } + return \preg_match('/^\s*\d{1,3}\s*$/', $child->get_text_content()) === 1; + } /** * Gets a single whole-card anchor child when it is the card's only content. * @@ -2990,7 +3112,26 @@ private static function is_decorative_figure_with_caption($element): bool if ('FIGCAPTION' !== $caption->get_tag_name() || \trim(wp_strip_all_tags($caption->get_inner_html())) === '') { return \false; } - return \count($children) === 1 || self::is_empty_decorative_element($children[0]); + return \count($children) === 1 || self::is_empty_decorative_element($children[0]) || self::is_nested_empty_decorative_element($children[0]); + } + /** + * Checks whether an element and its descendants are inert decorative chrome. + * + * @param HTML_To_Blocks_HTML_Element $element Source element. + * @return bool True when the subtree has no meaningful content or controls. + */ + private static function is_nested_empty_decorative_element($element): bool + { + if (!$element->has_attribute('aria-hidden') || 'true' !== \strtolower($element->get_attribute('aria-hidden'))) { + return \false; + } + if (\trim(wp_strip_all_tags($element->get_inner_html())) !== '') { + return \false; + } + foreach ($element->query_selector_all('a, button, input, select, textarea, img, video, audio, iframe, object, embed, svg') as $functional_child) { + return \false; + } + return !empty($element->get_child_elements()); } /** * Checks whether an empty element carries visual background styling. @@ -3154,6 +3295,9 @@ private static function get_paragraph_transforms() if (self::is_static_visual_label($element)) { return \true; } + if ('SPAN' === $element->get_tag_name() && $element->has_attribute('class')) { + return \false; + } return \in_array($element->get_tag_name(), array('DIV', 'SPAN'), \true) && array() === $element->get_child_elements() && \trim($element->get_text_content()) !== ''; }, 'transform' => function ($element) { $content = $element->get_tag_name() === 'A' ? self::get_paragraph_anchor_content($element) : $element->get_inner_html(); diff --git a/vendor_prefixed/chubes4/html-to-blocks-converter/raw-handler.php b/vendor_prefixed/chubes4/html-to-blocks-converter/raw-handler.php index 9b35d72..92a7f9d 100644 --- a/vendor_prefixed/chubes4/html-to-blocks-converter/raw-handler.php +++ b/vendor_prefixed/chubes4/html-to-blocks-converter/raw-handler.php @@ -140,6 +140,9 @@ function html_to_blocks_convert($html, $args = array()) if (empty(\trim($html))) { return array(); } + if (html_to_blocks_is_standalone_hash_anchor_fragment($html)) { + $html = html_to_blocks_normalise_blocks($html); + } $collect_metrics = \function_exists('has_action') && \has_action('html_to_blocks_convert_metrics'); $metrics = null; $convert_started = 0.0; @@ -745,6 +748,24 @@ function html_to_blocks_parse_shortcode($shortcode) } return HTML_To_Blocks_Block_Factory::create_block('core/shortcode', array('text' => $shortcode)); } +/** + * Checks whether an HTML fragment is one same-page hash anchor. + * + * @param string $html HTML fragment. + * @return bool True when the fragment is a standalone hash anchor. + */ +function html_to_blocks_is_standalone_hash_anchor_fragment(string $html): bool +{ + $element = HTML_To_Blocks_HTML_Element::from_html($html); + if (!$element || 'A' !== $element->get_tag_name()) { + return \false; + } + $href = \trim((string) $element->get_attribute('href')); + if ('' === $href || '#' !== $href[0]) { + return \false; + } + return \trim($element->get_outer_html()) === \trim($html); +} /** * Normalises blocks in HTML - wraps inline content in paragraphs * diff --git a/vendor_prefixed/chubes4/html-to-blocks-converter/tests/smoke-action-text-transforms.php b/vendor_prefixed/chubes4/html-to-blocks-converter/tests/smoke-action-text-transforms.php index 98a94ee..30db235 100644 --- a/vendor_prefixed/chubes4/html-to-blocks-converter/tests/smoke-action-text-transforms.php +++ b/vendor_prefixed/chubes4/html-to-blocks-converter/tests/smoke-action-text-transforms.php @@ -26,7 +26,7 @@ function esc_url($value) if (!\function_exists('BlockFormatBridge\Vendor\wp_strip_all_tags')) { function wp_strip_all_tags($value) { - return wp_strip_all_tags((string) $value); + return \strip_tags((string) $value); } } class WP_Block_Type_Registry @@ -154,6 +154,9 @@ public function get_registered($name) $class_sensitive_cta_row = new HTML_To_Blocks_HTML_Element('div', ['class' => 'cta-actions'], '', 'Browse the docs'); $class_sensitive_cta_row_transform = $find_transform($class_sensitive_cta_row); $smoke_assert('core/buttons' !== $class_sensitive_cta_row_transform['blockName'], 'class-sensitive-cta-row-avoids-buttons'); +$button_variant_row = new HTML_To_Blocks_HTML_Element('div', ['class' => 'hero-actions cta'], '', 'Find a classPlan your first visit'); +$button_variant_row_transform = $find_transform($button_variant_row); +$smoke_assert('core/buttons' !== $button_variant_row_transform['blockName'], 'class-sensitive-button-variant-row-avoids-buttons'); $ordinary_link = new HTML_To_Blocks_HTML_Element('p', [], '

Read more.

', 'Read more.'); $ordinary_link_transform = $find_transform($ordinary_link); $smoke_assert('core/paragraph' === $ordinary_link_transform['blockName'], 'ordinary-link-stays-paragraph'); @@ -227,6 +230,11 @@ public function get_registered($name) $onclick_submit_button = new HTML_To_Blocks_HTML_Element('button', ['class' => 'tab-btn', 'type' => 'submit', 'onclick' => 'submitForm()'], '', 'Submit'); $smoke_assert($find_transform($onclick_submit_button) === null, 'onclick-submit-button-falls-through'); // ------------------------------------------------------------------------- +// Spans: classed leaf spans preserve source display semantics as fallback HTML. +// ------------------------------------------------------------------------- +$classed_leaf_span = new HTML_To_Blocks_HTML_Element('span', ['class' => 'service-number'], '01', '01'); +$smoke_assert($find_transform($classed_leaf_span) === null, 'classed-leaf-span-falls-through'); +// ------------------------------------------------------------------------- // Labels: static visual UI labels become text, real form labels fall through. // ------------------------------------------------------------------------- $visual_label = new HTML_To_Blocks_HTML_Element('label', ['class' => 'inspector-label'], '', 'Type'); diff --git a/vendor_prefixed/chubes4/html-to-blocks-converter/tests/smoke-branded-link-spans.php b/vendor_prefixed/chubes4/html-to-blocks-converter/tests/smoke-branded-link-spans.php index aec5239..396ed25 100644 --- a/vendor_prefixed/chubes4/html-to-blocks-converter/tests/smoke-branded-link-spans.php +++ b/vendor_prefixed/chubes4/html-to-blocks-converter/tests/smoke-branded-link-spans.php @@ -115,7 +115,7 @@ function serialize_blocks(array $blocks): string $failures[] = 'FAIL [' . $label . ']' . ('' !== $detail ? ': ' . $detail : ''); } }; -$brand_cases = ['simple-span-brand' => ['html' => 'Studio Code', 'snippets' => ['href="#top"', 'aria-label="Studio Code home"', 'class="brand"', '', 'Studio Code']], 'formatted-span-brand' => ['html' => 'WicksteadRefill Works', 'snippets' => ['href="#top"', 'aria-label="Wickstead Refill Works home"', 'class="brand"', '', 'Wickstead', 'Refill Works']], 'div-wrapped-logo-brand' => ['html' => '', 'snippets' => ['href="#"', 'class="footer-logo"', '', 'Relay Atlas']]]; +$brand_cases = ['simple-span-brand' => ['html' => 'Studio Code', 'snippets' => ['href="#top"', 'aria-label="Studio Code home"', 'class="brand"', '', 'Studio Code']], 'formatted-span-brand' => ['html' => 'WicksteadRefill Works', 'snippets' => ['href="#top"', 'aria-label="Wickstead Refill Works home"', 'class="brand"', '', 'Wickstead', 'Refill Works']], 'formatted-span-brand-with-source-spacing' => ['html' => ' Wickstead Refill Works ', 'snippets' => ['href="#top"', 'aria-label="Wickstead Refill Works home"', 'class="brand"', '', 'Wickstead', 'Refill Works']], 'brand-with-small-tagline' => ['html' => 'Studio Tattoo Studio', 'snippets' => ['href="#top"', 'aria-label="Studio home"', 'class="brand"', 'Studio', 'Tattoo Studio']], 'footer-brand-without-aria-label' => ['html' => ' WicksteadRefill Works ', 'snippets' => ['href="#top"', 'class="brand footer-brand"', '', 'Wickstead', 'Refill Works']], 'footer-brand-reversed-class-order' => ['html' => 'WicksteadRefill Works', 'snippets' => ['href="#top"', 'class="footer-brand brand"', '', 'Wickstead', 'Refill Works']], 'div-wrapped-logo-brand' => ['html' => '', 'snippets' => ['href="#"', 'class="footer-logo"', '', 'Relay Atlas']]]; foreach ($brand_cases as $case_name => $case) { foreach ([$case['html'], '' . $case['html'] . ''] as $index => $html) { $serialized = serialize_blocks(html_to_blocks_raw_handler(['HTML' => $html])); @@ -129,6 +129,27 @@ function serialize_blocks(array $blocks): string $assert(!\str_contains($serialized, ' 'External'])); +$assert(\str_contains($external_brand, ''), 'aria-hidden-span-ruler-avoids-core-html', $aria_hidden_span_ruler); +$assert(\str_contains($aria_hidden_span_ruler, ''), 'visible-diagram-span-container-avoids-core-html', $visible_diagram_span_group); +$assert(\str_contains($visible_diagram_span_group, '