Skip to content

Commit 14599da

Browse files
committed
Skip already-checked URLs when output file exists
When using --file-output and the target CSV already contains results, those URLs are now skipped and the new results are appended instead of overwriting the file. This allows resuming interrupted runs.
1 parent 25a9fe8 commit 14599da

1 file changed

Lines changed: 45 additions & 6 deletions

File tree

src/Elgentos/HttpStatuscodeChecker/Console/CheckCommand.php

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class CheckCommand extends Command
2828
/_/
2929
by elgentos';
3030

31-
public const VERSION = '1.1.1';
31+
public const VERSION = '1.2.1';
3232
protected InputInterface $input;
3333
protected OutputInterface $output;
3434
protected string $name = 'check';
@@ -90,6 +90,18 @@ protected function execute(InputInterface $input, OutputInterface $output): int
9090

9191
$urls = array_filter($urls, [$this, 'validateUrl']);
9292

93+
// If file output is requested and the file already exists, skip URLs that have already been checked
94+
$outputFile = $this->input->getOption('file-output');
95+
if ($outputFile && file_exists($outputFile)) {
96+
$existingUrls = $this->getUrlsFromOutputFile($outputFile);
97+
if (!empty($existingUrls)) {
98+
$beforeCount = count($urls);
99+
$urls = array_values(array_diff($urls, $existingUrls));
100+
$skipped = $beforeCount - count($urls);
101+
$this->output->writeln(sprintf('<info>Skipping %d URLs already present in %s</info>', $skipped, $outputFile));
102+
}
103+
}
104+
93105
$this->output->writeln(sprintf('<info>Processing %s URLs...</info>', count($urls)));
94106

95107
$section = $output->section();
@@ -98,8 +110,8 @@ protected function execute(InputInterface $input, OutputInterface $output): int
98110
$this->table->render();
99111

100112
// Initialize CSV writer if file output is requested
101-
if ($this->input->getOption('file-output')) {
102-
$this->initializeCsvWriter($this->input->getOption('file-output'));
113+
if ($outputFile) {
114+
$this->initializeCsvWriter($outputFile, file_exists($outputFile));
103115
}
104116

105117
$this->checkForStatusCodes($urls);
@@ -324,12 +336,39 @@ private function getDelay(): int
324336
/**
325337
* Initialize CSV writer and write headers
326338
* @param string $outputFile
339+
* @param bool $append
327340
* @throws CannotInsertRecord
328341
*/
329-
private function initializeCsvWriter(string $outputFile): void
342+
private function initializeCsvWriter(string $outputFile, bool $append = false): void
330343
{
331-
$this->csvWriter = Writer::createFromPath($outputFile, 'w');
332-
$this->csvWriter->insertOne(['url', 'status_code']);
344+
if ($append) {
345+
$this->csvWriter = Writer::createFromPath($outputFile, 'a');
346+
} else {
347+
$this->csvWriter = Writer::createFromPath($outputFile, 'w');
348+
$this->csvWriter->insertOne(['url', 'status_code']);
349+
}
350+
}
351+
352+
/**
353+
* Read URLs already present in the output CSV file
354+
* @param string $outputFile
355+
* @return array
356+
*/
357+
private function getUrlsFromOutputFile(string $outputFile): array
358+
{
359+
try {
360+
$csv = Reader::createFromPath($outputFile);
361+
$csv->setHeaderOffset(0);
362+
$urls = [];
363+
foreach ($csv as $record) {
364+
if (isset($record['url'])) {
365+
$urls[] = $record['url'];
366+
}
367+
}
368+
return $urls;
369+
} catch (\Exception $e) {
370+
return [];
371+
}
333372
}
334373

335374
private function getUserAgent()

0 commit comments

Comments
 (0)