Skip to content

Commit 66ee59e

Browse files
authored
refactor(message-parser): optimize code point matchers (#20)
1 parent 34f43e1 commit 66ee59e

12 files changed

Lines changed: 508 additions & 2933 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
1212
- Provide `--add-missing-ids` extraction option to update source code with auto-generated identifiers
1313
- Add `Util\FormatHelper` that provides `getReader()` and `getWriter()` methods
1414
- Introduce `Format\Format` final static class for format constants
15+
- Port [@formatjs/icu-messageformat-parser](https://www.npmjs.com/package/@formatjs/icu-messageformat-parser) to FormatPHP (`FormatPHP\Icu\MessageFormat\Parser`)
1516

1617
### Changed
1718

src/Icu/MessageFormat/Parser/Util/AbstractCodePointMatcher.php

Lines changed: 0 additions & 44 deletions
This file was deleted.

src/Icu/MessageFormat/Parser/Util/CodePointHelper.php

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
use function mb_ord;
3131

3232
/**
33+
* A helper for working with code points
34+
*
3335
* @internal
3436
*/
3537
class CodePointHelper
@@ -49,32 +51,61 @@ public function __construct()
4951
$this->isPotentialElementNameChar = new IsPotentialElementNameChar();
5052
}
5153

54+
/**
55+
* Checks whether a code point is an alphabet character
56+
*
57+
* @see IsAlpha::matches()
58+
*/
5259
public function isAlpha(int $codepoint): bool
5360
{
5461
return $this->isAlpha->matches($codepoint);
5562
}
5663

64+
/**
65+
* Checks whether a code point is an alphabet character or a forward slash ("/")
66+
*
67+
* @see IsAlpha::matches()
68+
*/
5769
public function isAlphaOrSlash(int $codepoint): bool
5870
{
59-
return $this->isAlpha->matchesWithSlash($codepoint);
71+
return $codepoint === 0x002f || $this->isAlpha->matches($codepoint);
6072
}
6173

74+
/**
75+
* Checks whether a code point is in the Unicode Character Database
76+
* White_Space and Pattern_White_Space groups
77+
*
78+
* @see IsWhiteSpace::matches()
79+
*/
6280
public function isWhiteSpace(int $codepoint): bool
6381
{
6482
return $this->isWhiteSpace->matches($codepoint);
6583
}
6684

85+
/**
86+
* Checks whether a code point is in the Unicode Character Database
87+
* Pattern_Syntax group
88+
*
89+
* @see IsPatternSyntax::matches()
90+
*/
6791
public function isPatternSyntax(int $codepoint): bool
6892
{
6993
return $this->isPatternSyntax->matches($codepoint);
7094
}
7195

96+
/**
97+
* Checks whether the code point could be used in an HTML tag element name
98+
*
99+
* @see IsPotentialElementNameChar::matches()
100+
*/
72101
public function isPotentialElementNameChar(int $codepoint): bool
73102
{
74103
return $this->isPotentialElementNameChar->matches($codepoint);
75104
}
76105

77106
/**
107+
* Returns the code point for a character in a string array at a given offset
108+
*
78109
* @param string[] $stringArray
79110
*/
80111
public function charCodeAt(array $stringArray, int $offset): ?int
@@ -92,6 +123,9 @@ public function charCodeAt(array $stringArray, int $offset): ?int
92123
return null;
93124
}
94125

126+
/**
127+
* Returns the string character for a given code point
128+
*/
95129
public function fromCharCode(int $code): ?string
96130
{
97131
$char = mb_chr($code, Parser::ENCODING);
@@ -104,6 +138,8 @@ public function fromCharCode(int $code): ?string
104138
}
105139

106140
/**
141+
* Returns a string of characters for the given code points
142+
*
107143
* @throws InvalidUtf8CodePointException
108144
*/
109145
public function fromCodePoint(int ...$codePoints): string

src/Icu/MessageFormat/Parser/Util/IsAlpha.php

Lines changed: 8 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -23,85 +23,19 @@
2323
namespace FormatPHP\Icu\MessageFormat\Parser\Util;
2424

2525
/**
26+
* Checks whether a code point is an alphabet character
27+
*
2628
* @internal
2729
*/
28-
class IsAlpha extends AbstractCodePointMatcher
30+
class IsAlpha implements CodePointMatcherInterface
2931
{
3032
/**
31-
* Latin alphabet code points
32-
*
33-
* This array provides fast lookup of values.
34-
*/
35-
protected const CODE_POINTS = [
36-
0x0041,
37-
0x0042,
38-
0x0043,
39-
0x0044,
40-
0x0045,
41-
0x0046,
42-
0x0047,
43-
0x0048,
44-
0x0049,
45-
0x004a,
46-
0x004b,
47-
0x004c,
48-
0x004d,
49-
0x004e,
50-
0x004f,
51-
0x0050,
52-
0x0051,
53-
0x0052,
54-
0x0053,
55-
0x0054,
56-
0x0055,
57-
0x0056,
58-
0x0057,
59-
0x0058,
60-
0x0059,
61-
0x005a,
62-
0x0061,
63-
0x0062,
64-
0x0063,
65-
0x0064,
66-
0x0065,
67-
0x0066,
68-
0x0067,
69-
0x0068,
70-
0x0069,
71-
0x006a,
72-
0x006b,
73-
0x006c,
74-
0x006d,
75-
0x006e,
76-
0x006f,
77-
0x0070,
78-
0x0071,
79-
0x0072,
80-
0x0073,
81-
0x0074,
82-
0x0075,
83-
0x0076,
84-
0x0077,
85-
0x0078,
86-
0x0079,
87-
0x007a,
88-
];
89-
90-
/**
91-
* Checks whether the code point is a forward slash ("/") or an uppercase
92-
* or lowercase alphabet character
93-
*/
94-
public function matchesWithSlash(int $codepoint): bool
95-
{
96-
return $codepoint === 0x002f /* '/' */
97-
|| $this->matches($codepoint);
98-
}
99-
100-
/**
101-
* @inheritdoc
33+
* Returns true if the code point is a Latin uppercase letter A-Z or
34+
* lowercase letter a-z
10235
*/
103-
protected function getCodePoints(): array
36+
public function matches(int $codepoint): bool
10437
{
105-
return self::CODE_POINTS;
38+
return ($codepoint >= 0x0041 && $codepoint <= 0x005a)
39+
|| ($codepoint >= 0x0061 && $codepoint <= 0x007a);
10640
}
10741
}

0 commit comments

Comments
 (0)