From ce0d465f68751cef34e7577b936c97676fbd8b6f Mon Sep 17 00:00:00 2001 From: Gray Gilmore Date: Wed, 6 May 2026 14:00:13 -0700 Subject: [PATCH] Make whitespace filters Unicode-aware Previously we were only leveraging Ruby's `String#strip` to handle the logic in these filters but that only covers ASCII whitespace. When rendering Liquid templates into HTML it would be confusing for these filters to not strip *all* whitespace. Additionally, it's helpful when trying to compare two values in, say, a Liquid conditional. --- lib/liquid/standardfilters.rb | 21 +++++++++++--- test/integration/standard_filter_test.rb | 35 ++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/lib/liquid/standardfilters.rb b/lib/liquid/standardfilters.rb index ed6141566..e875c80b5 100644 --- a/lib/liquid/standardfilters.rb +++ b/lib/liquid/standardfilters.rb @@ -36,6 +36,19 @@ module StandardFilters %r{}m, ) STRIP_HTML_TAGS = /<.*?>/m + # Use POSIX whitespace matching so filters handle whitespace beyond Ruby String#strip's ASCII set. + WHITESPACE_LEFT = /\A[[:space:]]+/ + WHITESPACE_RIGHT = /[[:space:]]+\z/ + WHITESPACE_EDGES = Regexp.union(WHITESPACE_LEFT, WHITESPACE_RIGHT) + # Optimized runs regex to find 2 or more [[:space:]] OR a single [[:space:]] + # that isn't already `" " `. + WHITESPACE_RUNS = /([[:space:]]{2,}|[[[:space:]]&&[^ ]])/ + private_constant( + :WHITESPACE_EDGES, + :WHITESPACE_LEFT, + :WHITESPACE_RIGHT, + :WHITESPACE_RUNS, + ) class << self def try_coerce_encoding(input, encoding:) @@ -312,7 +325,7 @@ def split(input, pattern) def squish(input) return if input.nil? - Utils.to_s(input).strip.gsub(/\s+/, ' ') + Utils.to_s(input).gsub(WHITESPACE_RUNS, ' ').strip end # @liquid_public_docs @@ -324,7 +337,7 @@ def squish(input) # @liquid_return [string] def strip(input) input = Utils.to_s(input) - input.strip + input.gsub(WHITESPACE_EDGES, ' ').strip end # @liquid_public_docs @@ -336,7 +349,7 @@ def strip(input) # @liquid_return [string] def lstrip(input) input = Utils.to_s(input) - input.lstrip + input.gsub(WHITESPACE_LEFT, ' ').lstrip end # @liquid_public_docs @@ -348,7 +361,7 @@ def lstrip(input) # @liquid_return [string] def rstrip(input) input = Utils.to_s(input) - input.rstrip + input.gsub(WHITESPACE_RIGHT, ' ').rstrip end # @liquid_public_docs diff --git a/test/integration/standard_filter_test.rb b/test/integration/standard_filter_test.rb index 94097ae1f..c5f69c7ff 100644 --- a/test/integration/standard_filter_test.rb +++ b/test/integration/standard_filter_test.rb @@ -169,6 +169,15 @@ def test_squish_filter \t boo " | squish }})).render) assert_equal("", Liquid::Template.parse('{{ nil | squish }}').render) assert_equal("", Liquid::Template.parse('{{ " " | squish }}').render) + + unicode_spaces = "\u00A0\u202F\u2009\u2007" + + assert_template_result( + "foo bar boo", + "{{ source | squish }}", + { 'source' => "#{unicode_spaces}foo\u202F\u2009bar\t\n\u2007boo#{unicode_spaces}" }, + ) + assert_template_result("\u200Bfoo\u200B", "{{ source | squish }}", { 'source' => "\u200Bfoo\u200B" }) end def test_escape @@ -703,16 +712,42 @@ def test_pipes_in_string_arguments def test_strip assert_template_result('ab c', "{{ source | strip }}", { 'source' => " ab c " }) assert_template_result('ab c', "{{ source | strip }}", { 'source' => " \tab c \n \t" }) + + unicode_spaces = "\u00A0\u202F\u2009\u2007" + + assert_template_result( + 'ab c', + "{{ source | strip }}", + { 'source' => "#{unicode_spaces}ab c#{unicode_spaces}" }, + ) + assert_template_result("a\u00A0b\u202Fc", "{{ source | strip }}", { 'source' => "a\u00A0b\u202Fc" }) + assert_template_result("\u200Bfoo\u200B", "{{ source | strip }}", { 'source' => "\u200Bfoo\u200B" }) end def test_lstrip assert_template_result('ab c ', "{{ source | lstrip }}", { 'source' => " ab c " }) assert_template_result("ab c \n \t", "{{ source | lstrip }}", { 'source' => " \tab c \n \t" }) + + unicode_spaces = "\u00A0\u202F\u2009\u2007" + + assert_template_result( + "ab c#{unicode_spaces}", + "{{ source | lstrip }}", + { 'source' => "#{unicode_spaces}ab c#{unicode_spaces}" }, + ) end def test_rstrip assert_template_result(" ab c", "{{ source | rstrip }}", { 'source' => " ab c " }) assert_template_result(" \tab c", "{{ source | rstrip }}", { 'source' => " \tab c \n \t" }) + + unicode_spaces = "\u00A0\u202F\u2009\u2007" + + assert_template_result( + "#{unicode_spaces}ab c", + "{{ source | rstrip }}", + { 'source' => "#{unicode_spaces}ab c#{unicode_spaces}" }, + ) end def test_strip_newlines