Skip to content

Commit b10d2f9

Browse files
Tests: Adjust Unicode tests for consistency.
Includes: * Adding missing `@covers` tags. * Correcting test class names as per the naming conventions. * Moving `wp_check_invalid_utf8()` tests to their own file, separate from `wp_scrub_utf8()`. Follow-up to [60630], [60793], [61000]. See #64225. git-svn-id: https://develop.svn.wordpress.org/trunk@62207 602fd350-edb4-49c9-b593-d223f7449a82
1 parent e2d6d2b commit b10d2f9

4 files changed

Lines changed: 122 additions & 42 deletions

File tree

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
<?php
2+
3+
/**
4+
* Unit tests covering WordPress’ UTF-8 handling.
5+
*
6+
* @package WordPress
7+
* @group unicode
8+
*
9+
* @covers ::wp_check_invalid_utf8
10+
*/
11+
class Tests_Unicode_WpCheckInvalidUtf8 extends WP_UnitTestCase {
12+
13+
/**
14+
* Verifies that WordPress can properly detect valid and invalid UTF-8.
15+
*
16+
* @ticket 63837
17+
*
18+
* @dataProvider data_utf8_test_data
19+
*
20+
* @param string $bytes Bytes as a PHP string.
21+
* @param string|null $scrubbed Expected checked value, if string isn’t valid UTF-8.
22+
*/
23+
public function test_properly_checks_utf8( string $bytes, ?string $scrubbed = null ) {
24+
if ( null === $scrubbed ) {
25+
$this->assertSame(
26+
$bytes,
27+
wp_check_invalid_utf8( $bytes ),
28+
'Should have returned the unchanged string for valid UTF-8 input when not stripping invalid bytes.'
29+
);
30+
31+
$this->assertSame(
32+
$bytes,
33+
wp_check_invalid_utf8( $bytes, true ),
34+
'Should have returned the unchanged string for valid UTF-8 input when stripping invalid bytes.'
35+
);
36+
} else {
37+
$this->assertSame(
38+
'',
39+
wp_check_invalid_utf8( $bytes ),
40+
'Should have rejected invalid input, returning an empty string when not stripping invalid bytes.'
41+
);
42+
43+
$this->assertSame(
44+
$scrubbed,
45+
wp_check_invalid_utf8( $bytes, true ),
46+
'Failed to properly scrub the invalid spans of UTF-8 from the input string.'
47+
);
48+
}
49+
}
50+
51+
/**
52+
* Data provider.
53+
*
54+
* @throws Exception
55+
*
56+
* @return Generator
57+
*/
58+
public static function data_utf8_test_data() {
59+
$test_file = fopen( __DIR__ . '/../../data/unicode/utf8tests/utf8tests.txt', 'r' );
60+
$line_number = 0;
61+
$last_description = '';
62+
63+
while ( false !== ( $line = fgets( $test_file ) ) ) {
64+
++$line_number;
65+
66+
if ( empty( trim( $line ) ) ) {
67+
continue;
68+
}
69+
70+
if ( str_starts_with( $line, '#' ) ) {
71+
$last_description = trim( substr( $line, 1 ) );
72+
continue;
73+
}
74+
75+
$test_parts = explode( ':', $line );
76+
if ( count( $test_parts ) < 3 ) {
77+
throw new Exception( 'Wrong test data: check utf8tests.txt' );
78+
}
79+
80+
list( $reference, $classification, $test_data ) = $test_parts;
81+
82+
$reference = trim( $reference );
83+
$classification = trim( $classification );
84+
$test_data = trim( $test_data );
85+
86+
switch ( $classification ) {
87+
case 'valid':
88+
yield "{$reference} {$last_description}" => array( $test_data, null );
89+
break;
90+
91+
case 'valid hex':
92+
case 'invalid hex':
93+
if ( 'invalid hex' === $classification && count( $test_parts ) < 5 ) {
94+
throw new Exception( "Test data missing expected “scrubbed” value: check utf8tests.txt:{$line_number}" );
95+
}
96+
97+
$bytes = hex2bin( str_replace( ' ', '', $test_data ) );
98+
$scrubbed = 'invalid hex' === $classification
99+
? hex2bin( str_replace( ' ', '', trim( $test_parts[4] ) ) )
100+
: null;
101+
102+
yield "{$reference} {$last_description}" => array( $bytes, $scrubbed );
103+
break;
104+
105+
default:
106+
throw new Exception( "Test input file contains unrecognized input classification '{$classification}' (see utf8tests.txt): {$line}" );
107+
}
108+
}
109+
}
110+
}

tests/phpunit/tests/unicode/wpHasNoncharacters.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
*
55
* @package WordPress
66
* @group unicode
7+
*
8+
* @covers ::wp_has_noncharacters
79
*/
10+
class Tests_Unicode_WpHasNoncharacters extends WP_UnitTestCase {
811

9-
class Tests_WpHasNoncharacters extends WP_UnitTestCase {
1012
/**
1113
* Ensures that a noncharacter inside a string will be properly detected.
1214
*

tests/phpunit/tests/unicode/wpIsValidUtf8.php

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
<?php
2+
23
/**
34
* Unit tests covering WordPress’ UTF-8 handling.
45
*
56
* @package WordPress
67
* @group unicode
8+
*
9+
* @covers ::wp_is_valid_utf8
710
*/
11+
class Tests_Unicode_WpIsValidUtf8 extends WP_UnitTestCase {
812

9-
class Tests_WpIsValidUtf8TestCase extends WP_UnitTestCase {
1013
/**
1114
* Verifies that WordPress can properly detect valid and invalid UTF-8.
1215
*

tests/phpunit/tests/unicode/wpScrubUtf8.php

Lines changed: 5 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,14 @@
11
<?php
2+
23
/**
34
* Unit tests covering WordPress’ UTF-8 handling.
45
*
56
* @package WordPress
67
* @group unicode
8+
*
9+
* @covers ::wp_scrub_utf8
710
*/
8-
9-
class Tests_WpScrubUtf8 extends WP_UnitTestCase {
10-
/**
11-
* Verifies that WordPress can properly detect valid and invalid UTF-8.
12-
*
13-
* @ticket 63837
14-
*
15-
* @dataProvider data_utf8_test_data
16-
*
17-
* @param string $bytes Bytes as a PHP string.
18-
* @param string|null $scrubbed Expected checked value, if string isn’t valid UTF-8.
19-
*/
20-
public function test_properly_checks_utf8( string $bytes, ?string $scrubbed = null ) {
21-
if ( null === $scrubbed ) {
22-
$this->assertSame(
23-
$bytes,
24-
wp_check_invalid_utf8( $bytes ),
25-
'Should have returned the unchanged string for valid UTF-8 input when not stripping invalid bytes.'
26-
);
27-
28-
$this->assertSame(
29-
$bytes,
30-
wp_check_invalid_utf8( $bytes, true ),
31-
'Should have returned the unchanged string for valid UTF-8 input when stripping invalid bytes.'
32-
);
33-
} else {
34-
$this->assertSame(
35-
'',
36-
wp_check_invalid_utf8( $bytes ),
37-
'Should have rejected invalid input, returning an empty string when not stripping invalid bytes.'
38-
);
39-
40-
$this->assertSame(
41-
$scrubbed,
42-
wp_check_invalid_utf8( $bytes, true ),
43-
'Failed to properly scrub the invalid spans of UTF-8 from the input string.'
44-
);
45-
}
46-
}
11+
class Tests_Unicode_WpScrubUtf8 extends WP_UnitTestCase {
4712

4813
/**
4914
* Verifies that WordPress can properly detect valid UTF-8 while replacing invalid byte sequences.
@@ -82,7 +47,7 @@ public function test_properly_scrubs_utf8( string $bytes, ?string $scrubbed = nu
8247
* @param string $bytes Bytes as a PHP string.
8348
* @param string|null $scrubbed Expected checked value, if string isn’t valid UTF-8.
8449
*/
85-
public function test_fallback_properly_checks_utf8( string $bytes, ?string $scrubbed = null ) {
50+
public function test_fallback_properly_scrubs_utf8( string $bytes, ?string $scrubbed = null ) {
8651
if ( null === $scrubbed ) {
8752
$this->assertSame(
8853
$bytes,

0 commit comments

Comments
 (0)