Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/Command/AnalyserRunner.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
use function array_unshift;
use function array_values;
use function count;
use function filesize;
use function function_exists;
use function is_file;
use function memory_get_peak_usage;
Expand Down Expand Up @@ -73,7 +74,7 @@ public function runAnalyser(
}

if (!$debug && $allowParallel && function_exists('proc_open')) {
$schedule = $this->scheduler->scheduleWork($this->cpuCoreCounter->getNumberOfCpuCores(), $files);
$schedule = $this->scheduler->scheduleWork($this->cpuCoreCounter->getNumberOfCpuCores(), $files, static fn (string $file): int => (int) @filesize($file));

$mainScript = null;
if (isset($_SERVER['argv'][0]) && is_file($_SERVER['argv'][0])) {
Expand Down
3 changes: 2 additions & 1 deletion src/Command/FixerWorkerRunner.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
use function array_key_exists;
use function count;
use function filemtime;
use function filesize;
use function in_array;
use function is_file;
use function memory_get_peak_usage;
Expand Down Expand Up @@ -351,7 +352,7 @@ private function runAnalyser(LoopInterface $loop, array $files, array $allAnalys
));
}

$schedule = $this->scheduler->scheduleWork($this->cpuCoreCounter->getNumberOfCpuCores(), $files);
$schedule = $this->scheduler->scheduleWork($this->cpuCoreCounter->getNumberOfCpuCores(), $files, static fn (string $file): int => (int) @filesize($file));
$mainScript = null;
if (isset($_SERVER['argv'][0]) && is_file($_SERVER['argv'][0])) {
$mainScript = $_SERVER['argv'][0];
Expand Down
23 changes: 21 additions & 2 deletions src/Parallel/Scheduler.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
use PHPStan\DependencyInjection\AutowiredParameter;
use PHPStan\DependencyInjection\AutowiredService;
use PHPStan\Diagnose\DiagnoseExtension;
use function array_chunk;
use function array_values;
use function ceil;
use function count;
use function floor;
use function max;
use function min;
use function sprintf;
use function usort;

#[AutowiredService]
final class Scheduler implements DiagnoseExtension
Expand All @@ -38,13 +40,30 @@ public function __construct(

/**
* @param array<string> $files
* @param callable(string): int $fileSizeCallback
*/
public function scheduleWork(
int $cpuCores,
array $files,
callable $fileSizeCallback,
): Schedule
{
$jobs = array_chunk($files, $this->jobSize);
// sort by size and deal files round-robin across jobs so every job mixes
// large and small files - chunking a sorted list would concentrate the
// heaviest files into a single job and create one long-running straggler
$fileSizes = [];
foreach ($files as $file) {
$fileSizes[$file] = $fileSizeCallback($file);
}
usort($files, static fn (string $a, string $b): int => $fileSizes[$b] <=> $fileSizes[$a]);

$numberOfJobs = (int) ceil(count($files) / $this->jobSize);
$stripedJobs = [];
foreach ($files as $i => $file) {
$stripedJobs[$i % $numberOfJobs][] = $file;
}

$jobs = array_values($stripedJobs);
Comment on lines +60 to +66

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could this last block be simplified to $jobs = array_chunk($files, $this->jobSize); again?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It can't, unfortunately — that's the one shape I measured as actively harmful. With the list sorted by size, array_chunk() puts the ~20 heaviest files together in the first job, and whichever worker picks it up becomes a straggler the whole run waits for: sort + chunk measured +46% cold wall time vs. unsorted chunking on the src/Type self-analysis benchmark, while sort + round-robin striping is what produces the −21..23%. The striping is the point, not an artifact; the new testHeaviestFilesAreSpreadAcrossJobs test pins exactly this property.

$numberOfProcesses = min(
max((int) floor(count($jobs) / $this->minimumNumberOfJobsPerProcess), 1),
$cpuCores,
Expand Down
62 changes: 56 additions & 6 deletions tests/PHPStan/Parallel/SchedulerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,12 @@
use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\TestCase;
use function array_fill;
use function array_keys;
use function array_map;
use function array_merge;
use function count;
use function sort;
use function sprintf;

class SchedulerTest extends TestCase
{
Expand All @@ -21,7 +25,7 @@ public static function dataSchedule(): array
50,
115,
1,
[50, 50, 15],
[39, 38, 38],
],
[
16,
Expand All @@ -30,7 +34,7 @@ public static function dataSchedule(): array
30,
124,
5,
[30, 30, 30, 30, 4],
[25, 25, 25, 25, 24],
],
[
16,
Expand All @@ -39,7 +43,7 @@ public static function dataSchedule(): array
30,
124,
3,
[30, 30, 30, 30, 4],
[25, 25, 25, 25, 24],
],
[
16,
Expand All @@ -48,7 +52,7 @@ public static function dataSchedule(): array
10,
298,
16,
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 8],
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9],
],
[
16,
Expand All @@ -57,7 +61,7 @@ public static function dataSchedule(): array
30,
124,
2,
[30, 30, 30, 30, 4],
[25, 25, 25, 25, 24],

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the tests could be more expressive here. Could we supply differently sized files here, or maybe change the Scheduler signature so that we provide file sizes from the outside and the class is still unit testable?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done — scheduleWork() now takes a callable(string): int for the file sizes, so the class is unit-testable without touching the filesystem; both production callers pass fn ($file) => (int) @filesize($file). Added two expressive tests with synthetic sizes: one asserts the exact striped layout for six distinctly-sized files (the three heaviest files end up in three different jobs, each paired with a light file), the other checks every file is scheduled exactly once and no job exceeds jobSize for a larger irregular set. The existing count-based dataset tests now pass a constant-size callback.

],
[
16,
Expand Down Expand Up @@ -91,11 +95,57 @@ public function testSchedule(
{
$files = array_fill(0, $numberOfFiles, 'file.php');
$scheduler = new Scheduler($jobSize, $maximumNumberOfProcesses, $minimumNumberOfJobsPerProcess);
$schedule = $scheduler->scheduleWork($cpuCores, $files);
$schedule = $scheduler->scheduleWork($cpuCores, $files, static fn (string $file): int => 0);

$this->assertSame($expectedNumberOfProcesses, $schedule->getNumberOfProcesses());
$jobSizes = array_map(static fn (array $job): int => count($job), $schedule->getJobs());
$this->assertSame($expectedJobSizes, $jobSizes);
}

public function testHeaviestFilesAreSpreadAcrossJobs(): void
{
$fileSizes = [
'a.php' => 100,
'b.php' => 200,
'c.php' => 300,
'd.php' => 400,
'e.php' => 500,
'f.php' => 600,
];

$scheduler = new Scheduler(2, 16, 1);
$schedule = $scheduler->scheduleWork(16, array_keys($fileSizes), static fn (string $file): int => $fileSizes[$file] ?? 0);

// six files, job size 2 -> three jobs; the three heaviest files must not
// share a job, and every job pairs one heavy file with one light file
$this->assertSame([
['f.php', 'c.php'],
['e.php', 'b.php'],
['d.php', 'a.php'],
], $schedule->getJobs());
}

public function testEveryFileIsScheduledExactlyOnce(): void
{
$files = [];
$sizes = [];
for ($i = 0; $i < 47; $i++) {
$file = sprintf('file-%d.php', $i);
$files[] = $file;
$sizes[$file] = ($i * 37) % 1000;
}

$scheduler = new Scheduler(10, 16, 1);
$schedule = $scheduler->scheduleWork(16, $files, static fn (string $file): int => $sizes[$file]);

$scheduled = array_merge(...$schedule->getJobs());
sort($scheduled);
sort($files);
$this->assertSame($files, $scheduled);

foreach ($schedule->getJobs() as $job) {
$this->assertLessThanOrEqual(10, count($job));
}
}

}
Loading