Skip to content

Commit 56ead0f

Browse files
committed
feat: allow passing custom parsers to the extractor
1 parent c89c21e commit 56ead0f

18 files changed

Lines changed: 647 additions & 198 deletions

src/ConfigInterface.php

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
namespace FormatPHP;
2424

2525
use FormatPHP\Extractor\IdInterpolator;
26-
use FormatPHP\Extractor\IdInterpolatorOptions;
2726
use FormatPHP\Intl\LocaleInterface;
2827

2928
/**
@@ -39,32 +38,7 @@ public function getDefaultLocale(): ?LocaleInterface;
3938
/**
4039
* Returns a pattern that defines how to generate missing message IDs
4140
*
42-
* If message descriptors are missing the id property, we will use this
43-
* pattern to automatically generate IDs for them.
44-
*
45-
* The pattern follows this format:
46-
*
47-
* [hashAlgorithm:contenthash:encodingAlgorithm:length]
48-
*
49-
* When passing this value, provide the hashAlgorithm, encodingAlgorithm,
50-
* and length, and formatphp will calculate the contenthash.
51-
*
52-
* For example, if you wish to use `haval160,4` as the hashing algorithm,
53-
* `hex` as the encoding algorithm, with a length of 10, you would pass
54-
* the following string:
55-
*
56-
* [haval160,4:contenthash:hex:10]
57-
*
58-
* See <https://www.php.net/hash_algos> for available hashing algorithms.
59-
*
60-
* The following binary-to-text encodings are supported:
61-
*
62-
* - `base64`
63-
* - `base64url`
64-
* - `hex`
65-
*
6641
* @see IdInterpolator
67-
* @see IdInterpolatorOptions
6842
*/
6943
public function getIdInterpolatorPattern(): string;
7044

src/Console/Command/ExtractCommand.php

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939

4040
use function array_map;
4141
use function array_merge;
42+
use function array_unique;
4243
use function explode;
4344
use function getcwd;
4445

@@ -47,8 +48,6 @@
4748
*/
4849
class ExtractCommand extends AbstractCommand
4950
{
50-
private const DEFAULT_FUNCTION_NAMES = ['formatMessage'];
51-
5251
private const STANDARD_IGNORES = [
5352
'.arch-params',
5453
'.bzr',
@@ -130,6 +129,13 @@ protected function configure(): void
130129
'Comma-separated list of additional function names to search '
131130
. 'for when extracting messages.',
132131
)
132+
->addOption(
133+
'parser',
134+
'p',
135+
InputOption::VALUE_IS_ARRAY | InputOption::VALUE_REQUIRED,
136+
'Parser name or path to a parser script to apply additional '
137+
. 'parsing in addition to the default PHP parsing.',
138+
)
133139
->addOption(
134140
'ignore',
135141
null,
@@ -201,6 +207,10 @@ private function buildOptions(InputInterface $input): MessageExtractorOptions
201207
$idInterpolationPattern = $input->getOption('id-interpolation-pattern');
202208
$options->idInterpolationPattern = $idInterpolationPattern ?? IdInterpolator::DEFAULT_ID_INTERPOLATION_PATTERN;
203209

210+
/** @var string[] $parsers */
211+
$parsers = (array) $input->getOption('parser');
212+
$options->parsers = array_unique(array_merge($options->parsers, $parsers));
213+
204214
/** @var string[] $ignore */
205215
$ignore = (array) $input->getOption('ignore');
206216
$options->ignore = array_merge(self::STANDARD_IGNORES, $ignore);
@@ -213,12 +223,10 @@ private function buildOptions(InputInterface $input): MessageExtractorOptions
213223
$options->throws = (bool) $input->getOption('throws');
214224
$options->preserveWhitespace = (bool) $input->getOption('preserve-whitespace');
215225

216-
/** @var string $additionalFunctionNames */
217-
$additionalFunctionNames = $input->getOption('additional-function-names') ?? '';
218-
$options->additionalFunctionNames = array_merge(
219-
self::DEFAULT_FUNCTION_NAMES,
220-
array_map('trim', explode(',', $additionalFunctionNames)),
221-
);
226+
/** @var string $inputFunctionNames */
227+
$inputFunctionNames = $input->getOption('additional-function-names') ?? '';
228+
$additionalFunctionNames = array_map('trim', explode(',', $inputFunctionNames));
229+
$options->functionNames = array_unique(array_merge($options->functionNames, $additionalFunctionNames));
222230

223231
return $options;
224232
}

src/Extractor/IdInterpolator.php

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
namespace FormatPHP\Extractor;
2424

2525
use Closure;
26-
use FormatPHP\ConfigInterface;
2726
use FormatPHP\DescriptorInterface;
2827
use FormatPHP\Exception\InvalidArgumentException;
2928
use FormatPHP\Exception\UnableToGenerateMessageIdException;
@@ -46,7 +45,29 @@
4645
/**
4746
* IdInterpolator supports generation of message descriptor IDs
4847
*
49-
* @see ConfigInterface::getIdInterpolatorPattern()
48+
* If message descriptors are missing the id property, we will use this
49+
* pattern to automatically generate IDs for them.
50+
*
51+
* The pattern follows this format:
52+
*
53+
* [hashAlgorithm:contenthash:encodingAlgorithm:length]
54+
*
55+
* When passing this value, provide the hashAlgorithm, encodingAlgorithm,
56+
* and length, and formatphp will calculate the contenthash.
57+
*
58+
* For example, if you wish to use `haval160,4` as the hashing algorithm,
59+
* `hex` as the encoding algorithm, with a length of 10, you would pass
60+
* the following string:
61+
*
62+
* [haval160,4:contenthash:hex:10]
63+
*
64+
* See <https://www.php.net/hash_algos> for available hashing algorithms.
65+
*
66+
* The following binary-to-text encodings are supported:
67+
*
68+
* - `base64`
69+
* - `base64url`
70+
* - `hex`
5071
*/
5172
class IdInterpolator
5273
{
@@ -62,8 +83,6 @@ class IdInterpolator
6283
* If the message descriptor does not have a default message, we cannot
6384
* generate an ID, so we throw `UnableToGenerateMessageId`.
6485
*
65-
* @see ConfigInterface::getIdInterpolatorPattern()
66-
*
6786
* @throws InvalidArgumentException
6887
* @throws UnableToGenerateMessageIdException
6988
*/

src/Extractor/IdInterpolatorOptions.php

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,21 +22,30 @@
2222

2323
namespace FormatPHP\Extractor;
2424

25-
use FormatPHP\ConfigInterface;
26-
2725
/**
2826
* IdInterpolator options
2927
*
30-
* @see ConfigInterface::getIdInterpolatorPattern()
28+
* @see IdInterpolator
3129
*/
3230
class IdInterpolatorOptions
3331
{
3432
private const DEFAULT_HASHING_ALGORITHM = 'sha512';
3533
private const DEFAULT_ENCODING_ALGORITHM = 'base64';
3634
private const DEFAULT_LENGTH = 6;
3735

36+
/**
37+
* The hashing algorithm to use when creating the ID
38+
*/
3839
public string $hashingAlgorithm;
40+
41+
/**
42+
* The encoding algorithm to use to encode the ID
43+
*/
3944
public string $encodingAlgorithm;
45+
46+
/**
47+
* The length of the ID; if the generated ID is longer than this, it will be truncated
48+
*/
4049
public int $length;
4150

4251
public function __construct(

src/Extractor/MessageExtractor.php

Lines changed: 65 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
use FormatPHP\Exception\UnableToWriteFileException;
3232
use FormatPHP\Extractor\Parser\Descriptor\PhpParser;
3333
use FormatPHP\Extractor\Parser\DescriptorParserInterface;
34+
use FormatPHP\Extractor\Parser\ParserErrorCollection;
3435
use FormatPHP\Format\Writer\FormatPHPWriter;
3536
use FormatPHP\Format\Writer\SimpleWriter;
3637
use FormatPHP\Format\Writer\SmartlingWriter;
@@ -67,11 +68,7 @@ class MessageExtractor
6768
private Globber $globber;
6869
private LoggerInterface $logger;
6970
private MessageExtractorOptions $options;
70-
71-
/**
72-
* @var DescriptorParserInterface[]
73-
*/
74-
private array $parsers;
71+
private ParserErrorCollection $errors;
7572

7673
/**
7774
* @throws LogicException
@@ -86,7 +83,7 @@ public function __construct(
8683
$this->logger = $logger;
8784
$this->globber = $globber;
8885
$this->file = $file;
89-
$this->parsers = $this->loadParsers();
86+
$this->errors = new ParserErrorCollection();
9087
}
9188

9289
/**
@@ -135,14 +132,19 @@ public function process(array $files): void
135132
$this->writeOutput($this->prepareOutput($formatter, $descriptors));
136133
}
137134

135+
public function getErrors(): ParserErrorCollection
136+
{
137+
return $this->errors;
138+
}
139+
138140
/**
139141
* @throws UnableToProcessFileException
140142
*/
141143
private function parse(DescriptorCollection $descriptors, string $filePath): DescriptorCollection
142144
{
143-
foreach ($this->parsers as $parser) {
145+
foreach ($this->getDescriptorParsers() as $parser) {
144146
/** @var DescriptorCollection $descriptors */
145-
$descriptors = $descriptors->merge($parser->parse($filePath));
147+
$descriptors = $descriptors->merge($parser($filePath, $this->options, $this->errors));
146148
}
147149

148150
return $descriptors;
@@ -153,25 +155,50 @@ private function parse(DescriptorCollection $descriptors, string $filePath): Des
153155
*
154156
* @throws LogicException
155157
*/
156-
private function loadParsers(): array
158+
private function getDescriptorParsers(): array
157159
{
158160
$parsers = [];
159-
$parsers[] = $this->getPhpParser();
161+
162+
foreach ($this->options->parsers as $parser) {
163+
$parsers[] = $this->loadDescriptorParser($parser);
164+
}
160165

161166
return $parsers;
162167
}
163168

164169
/**
165170
* @throws LogicException
166171
*/
167-
private function getPhpParser(): PhpParser
172+
private function loadDescriptorParser(string $parserNameOrScript): DescriptorParserInterface
168173
{
169-
return new PhpParser(
170-
$this->file,
171-
$this->options->additionalFunctionNames,
172-
$this->options->pragma,
173-
$this->options->preserveWhitespace,
174-
);
174+
switch (strtolower($parserNameOrScript)) {
175+
case 'php':
176+
return new PhpParser($this->file);
177+
}
178+
179+
if (class_exists($parserNameOrScript) && is_a($parserNameOrScript, DescriptorParserInterface::class, true)) {
180+
$parser = new $parserNameOrScript();
181+
} else {
182+
/** @var Closure(string,MessageExtractorOptions,ParserErrorCollection):DescriptorCollection | null $parser */
183+
$parser = $this->file->loadClosureFromScript($parserNameOrScript);
184+
}
185+
186+
if ($parser instanceof DescriptorParserInterface) {
187+
return $parser;
188+
}
189+
190+
if (is_callable($parser)) {
191+
return $this->createInvokableDescriptorParser($parser);
192+
}
193+
194+
throw new InvalidArgumentException(sprintf(
195+
'The parser provided is not a known descriptor parser, an instance of '
196+
. '%s, or a callable of the shape `callable(string,%s,%s):%s`.',
197+
DescriptorParserInterface::class,
198+
MessageExtractorOptions::class,
199+
ParserErrorCollection::class,
200+
DescriptorCollection::class,
201+
));
175202
}
176203

177204
/**
@@ -256,4 +283,25 @@ private function writeOutput(string $output): void
256283

257284
$this->file->writeContents($stream, $output);
258285
}
286+
287+
private function createInvokableDescriptorParser(callable $parser): DescriptorParserInterface
288+
{
289+
return new class ($parser) implements DescriptorParserInterface {
290+
private Closure $parser;
291+
292+
public function __construct(callable $parser)
293+
{
294+
$this->parser = Closure::fromCallable($parser);
295+
}
296+
297+
public function __invoke(
298+
string $filePath,
299+
MessageExtractorOptions $options,
300+
ParserErrorCollection $errors
301+
): DescriptorCollection {
302+
/** @var DescriptorCollection */
303+
return ($this->parser)($filePath, $options, $errors);
304+
}
305+
};
306+
}
259307
}

src/Extractor/MessageExtractorOptions.php

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,74 @@
2727
*/
2828
class MessageExtractorOptions
2929
{
30+
private const DEFAULT_FUNCTION_NAMES = ['formatMessage'];
31+
private const DEFAULT_PARSERS = ['php'];
32+
33+
/**
34+
* Formatter name or path to a formatter script that controls the shape
35+
* of the JSON produced for $outFile
36+
*/
3037
public ?string $format = null;
38+
39+
/**
40+
* Target file path to save the JSON output file of all translations
41+
* extracted from the files
42+
*/
3143
public ?string $outFile = null;
44+
45+
/**
46+
* If message descriptors are missing the id property, we will use this
47+
* pattern to automatically generate IDs for them
48+
*
49+
* @see IdInterpolator
50+
*/
3251
public string $idInterpolationPattern = IdInterpolator::DEFAULT_ID_INTERPOLATION_PATTERN;
52+
53+
/**
54+
* Whether to extract metadata for the source files
55+
*
56+
* If true, the extracted descriptors will each include `file`, `start`,
57+
* `end`, `line`, and `col` properties.
58+
*/
3359
public bool $extractSourceLocation = false;
60+
61+
/**
62+
* Whether to throw an exception when failing to process any file in the batch
63+
*
64+
* The default is to emit warnings, while continuing to process the rest
65+
* of the files.
66+
*/
3467
public bool $throws = false;
68+
69+
/**
70+
* Allows parsing of additional custom pragma to include custom metadata in
71+
* the extracted messages
72+
*/
3573
public ?string $pragma = null;
74+
75+
/**
76+
* Whether to preserve whitespace and newlines in extracted messages
77+
*/
3678
public bool $preserveWhitespace = false;
3779

3880
/**
81+
* Function and method names to parse from the application source code
82+
*
3983
* @var string[]
4084
*/
41-
public array $additionalFunctionNames = [];
85+
public array $functionNames = self::DEFAULT_FUNCTION_NAMES;
4286

4387
/**
4488
* Glob file path patterns to ignore
4589
*
4690
* @var string[]
4791
*/
4892
public array $ignore = [];
93+
94+
/**
95+
* Parsers to use for extracting format messages from application source code
96+
*
97+
* @var string[]
98+
*/
99+
public array $parsers = self::DEFAULT_PARSERS;
49100
}

0 commit comments

Comments
 (0)