@@ -258,6 +258,58 @@ def hello:
258258 expect ( helper . sanitize ( '<script/>' ) ) . to be_empty
259259 end
260260 end
261+
262+ describe '#sanitize_ckeditor_rich_text' do
263+ it 'leaves plain text without combining marks unchanged' do
264+ html = '<p>Hello World</p>'
265+ expect ( helper . sanitize_ckeditor_rich_text ( html ) ) . to include ( 'Hello World' )
266+ end
267+
268+ it 'preserves single combining marks (normal accented characters)' do
269+ html = "<p>Cafe\u0301 and Nin\u0303 o</p>"
270+ result = helper . sanitize_ckeditor_rich_text ( html )
271+ expect ( result ) . to include ( "e\u0301 " ) # é via combining mark
272+ expect ( result ) . to include ( "n\u0303 " ) # ñ via combining mark
273+ end
274+
275+ it 'preserves multiple combining marks (e.g. for Indic scripts)' do
276+ html = "<p>Ta\u0302 \u0301 t ca\u0309 mo\u0323 i ngu\u031b o\u031b \u0300 i sinh ra \u0111 e\u0302 \u0300 u</p>"
277+ result = helper . sanitize_ckeditor_rich_text ( html )
278+ expect ( result ) . to include ( "\u0302 \u0301 " ) # 2 combining marks on 'a'
279+ expect ( result ) . to include ( "\u0309 " ) # 1 combining mark on 'a'
280+ expect ( result ) . to include ( "\u0323 " ) # 1 combining mark on 'o'
281+ expect ( result ) . to include ( "\u031b \u0300 " ) # 2 combining marks on 'o'
282+ expect ( result ) . to include ( "\u0302 \u0300 " ) # 2 combining marks on 'e'
283+ end
284+
285+ it 'preserves text with exactly 3 combining marks' do
286+ html = "<p>e\u0300 \u0301 \u0302 </p>"
287+ result = helper . sanitize_ckeditor_rich_text ( html )
288+ expect ( result ) . to match ( /\p {M}{3}/ )
289+ expect ( result ) . not_to match ( /\p {M}{4}/ )
290+ end
291+
292+ it 'collapses 4 combining marks down to 3' do
293+ html = "<p>e\u0300 \u0301 \u0302 \u0303 </p>"
294+ result = helper . sanitize_ckeditor_rich_text ( html )
295+ expect ( result ) . not_to match ( /\p {M}{4,}/ )
296+ end
297+
298+ it 'collapses Zalgo-style text with many combining marks down to 3' do
299+ # 10 combining marks on a single base character
300+ zalgo = "e#{ ( 0x0300 ..0x0309 ) . map { |cp | cp . chr ( Encoding ::UTF_8 ) } . join } "
301+ html = "<p>#{ zalgo } </p>"
302+ result = helper . sanitize_ckeditor_rich_text ( html )
303+ expect ( result ) . not_to match ( /\p {M}{4,}/ )
304+ end
305+
306+ it 'handles multiple Zalgo sequences in the same text node' do
307+ combining_run = ( 0x0300 ..0x0309 ) . map { |cp | cp . chr ( Encoding ::UTF_8 ) } . join
308+ html = "<p>a#{ combining_run } and b#{ combining_run } </p>"
309+ result = helper . sanitize_ckeditor_rich_text ( html )
310+ expect ( result ) . not_to match ( /\p {M}{4,}/ )
311+ end
312+ end
261313 end
262314
263315 describe 'user display helper' do
0 commit comments