diff --git a/src/Command/AnalyserRunner.php b/src/Command/AnalyserRunner.php index 51149328b96..fba6e420608 100644 --- a/src/Command/AnalyserRunner.php +++ b/src/Command/AnalyserRunner.php @@ -16,6 +16,7 @@ use function array_unshift; use function array_values; use function count; +use function filesize; use function function_exists; use function is_file; use function memory_get_peak_usage; @@ -73,7 +74,7 @@ public function runAnalyser( } if (!$debug && $allowParallel && function_exists('proc_open')) { - $schedule = $this->scheduler->scheduleWork($this->cpuCoreCounter->getNumberOfCpuCores(), $files); + $schedule = $this->scheduler->scheduleWork($this->cpuCoreCounter->getNumberOfCpuCores(), $files, static fn (string $file): int => (int) @filesize($file)); $mainScript = null; if (isset($_SERVER['argv'][0]) && is_file($_SERVER['argv'][0])) { diff --git a/src/Command/FixerWorkerRunner.php b/src/Command/FixerWorkerRunner.php index 9f70596ae82..9a46b1501b2 100644 --- a/src/Command/FixerWorkerRunner.php +++ b/src/Command/FixerWorkerRunner.php @@ -26,6 +26,7 @@ use function array_key_exists; use function count; use function filemtime; +use function filesize; use function in_array; use function is_file; use function memory_get_peak_usage; @@ -351,7 +352,7 @@ private function runAnalyser(LoopInterface $loop, array $files, array $allAnalys )); } - $schedule = $this->scheduler->scheduleWork($this->cpuCoreCounter->getNumberOfCpuCores(), $files); + $schedule = $this->scheduler->scheduleWork($this->cpuCoreCounter->getNumberOfCpuCores(), $files, static fn (string $file): int => (int) @filesize($file)); $mainScript = null; if (isset($_SERVER['argv'][0]) && is_file($_SERVER['argv'][0])) { $mainScript = $_SERVER['argv'][0]; diff --git a/src/Parallel/Scheduler.php b/src/Parallel/Scheduler.php index 879cdcc5f5f..9cc685025fc 100644 --- a/src/Parallel/Scheduler.php +++ b/src/Parallel/Scheduler.php @@ -6,12 +6,14 @@ use PHPStan\DependencyInjection\AutowiredParameter; use PHPStan\DependencyInjection\AutowiredService; use PHPStan\Diagnose\DiagnoseExtension; -use function array_chunk; +use function array_values; +use function ceil; use function count; use function floor; use function max; use function min; use function sprintf; +use function usort; #[AutowiredService] final class Scheduler implements DiagnoseExtension @@ -38,13 +40,30 @@ public function __construct( /** * @param array $files + * @param callable(string): int $fileSizeCallback */ public function scheduleWork( int $cpuCores, array $files, + callable $fileSizeCallback, ): Schedule { - $jobs = array_chunk($files, $this->jobSize); + // sort by size and deal files round-robin across jobs so every job mixes + // large and small files - chunking a sorted list would concentrate the + // heaviest files into a single job and create one long-running straggler + $fileSizes = []; + foreach ($files as $file) { + $fileSizes[$file] = $fileSizeCallback($file); + } + usort($files, static fn (string $a, string $b): int => $fileSizes[$b] <=> $fileSizes[$a]); + + $numberOfJobs = (int) ceil(count($files) / $this->jobSize); + $stripedJobs = []; + foreach ($files as $i => $file) { + $stripedJobs[$i % $numberOfJobs][] = $file; + } + + $jobs = array_values($stripedJobs); $numberOfProcesses = min( max((int) floor(count($jobs) / $this->minimumNumberOfJobsPerProcess), 1), $cpuCores, diff --git a/tests/PHPStan/Parallel/SchedulerTest.php b/tests/PHPStan/Parallel/SchedulerTest.php index 284af8d55cb..dc13f696012 100644 --- a/tests/PHPStan/Parallel/SchedulerTest.php +++ b/tests/PHPStan/Parallel/SchedulerTest.php @@ -5,8 +5,12 @@ use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\TestCase; use function array_fill; +use function array_keys; use function array_map; +use function array_merge; use function count; +use function sort; +use function sprintf; class SchedulerTest extends TestCase { @@ -21,7 +25,7 @@ public static function dataSchedule(): array 50, 115, 1, - [50, 50, 15], + [39, 38, 38], ], [ 16, @@ -30,7 +34,7 @@ public static function dataSchedule(): array 30, 124, 5, - [30, 30, 30, 30, 4], + [25, 25, 25, 25, 24], ], [ 16, @@ -39,7 +43,7 @@ public static function dataSchedule(): array 30, 124, 3, - [30, 30, 30, 30, 4], + [25, 25, 25, 25, 24], ], [ 16, @@ -48,7 +52,7 @@ public static function dataSchedule(): array 10, 298, 16, - [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 8], + [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9], ], [ 16, @@ -57,7 +61,7 @@ public static function dataSchedule(): array 30, 124, 2, - [30, 30, 30, 30, 4], + [25, 25, 25, 25, 24], ], [ 16, @@ -91,11 +95,57 @@ public function testSchedule( { $files = array_fill(0, $numberOfFiles, 'file.php'); $scheduler = new Scheduler($jobSize, $maximumNumberOfProcesses, $minimumNumberOfJobsPerProcess); - $schedule = $scheduler->scheduleWork($cpuCores, $files); + $schedule = $scheduler->scheduleWork($cpuCores, $files, static fn (string $file): int => 0); $this->assertSame($expectedNumberOfProcesses, $schedule->getNumberOfProcesses()); $jobSizes = array_map(static fn (array $job): int => count($job), $schedule->getJobs()); $this->assertSame($expectedJobSizes, $jobSizes); } + public function testHeaviestFilesAreSpreadAcrossJobs(): void + { + $fileSizes = [ + 'a.php' => 100, + 'b.php' => 200, + 'c.php' => 300, + 'd.php' => 400, + 'e.php' => 500, + 'f.php' => 600, + ]; + + $scheduler = new Scheduler(2, 16, 1); + $schedule = $scheduler->scheduleWork(16, array_keys($fileSizes), static fn (string $file): int => $fileSizes[$file] ?? 0); + + // six files, job size 2 -> three jobs; the three heaviest files must not + // share a job, and every job pairs one heavy file with one light file + $this->assertSame([ + ['f.php', 'c.php'], + ['e.php', 'b.php'], + ['d.php', 'a.php'], + ], $schedule->getJobs()); + } + + public function testEveryFileIsScheduledExactlyOnce(): void + { + $files = []; + $sizes = []; + for ($i = 0; $i < 47; $i++) { + $file = sprintf('file-%d.php', $i); + $files[] = $file; + $sizes[$file] = ($i * 37) % 1000; + } + + $scheduler = new Scheduler(10, 16, 1); + $schedule = $scheduler->scheduleWork(16, $files, static fn (string $file): int => $sizes[$file]); + + $scheduled = array_merge(...$schedule->getJobs()); + sort($scheduled); + sort($files); + $this->assertSame($files, $scheduled); + + foreach ($schedule->getJobs() as $job) { + $this->assertLessThanOrEqual(10, count($job)); + } + } + }