Skip to content

Commit abe91a6

Browse files
committed
Simplify and improve logging for row skipping
1 parent bbd17fb commit abe91a6

4 files changed

Lines changed: 34 additions & 81 deletions

File tree

models/CsvImport/File.php

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,9 @@ public function parse()
9898
$this->_parseErrors[] = $e->getMessage()
9999
. ' ' . __('Please ensure that all column names are unique.');
100100
return false;
101-
} catch (CsvImport_MissingColumnException $e) {
102-
$this->_parseErrors[] = $e->getMessage()
101+
}
102+
if ($this->_columnExamples === false) {
103+
$this->_parseErrors[] = __('Incorrect number of columns in first row.')
103104
. ' ' . __('Please ensure that the CSV file is formatted correctly'
104105
. ' and contains the expected number of columns for each row.');
105106
return false;
@@ -116,4 +117,4 @@ public function getErrorString()
116117
{
117118
return join(' ', $this->_parseErrors);
118119
}
119-
}
120+
}

models/CsvImport/Import.php

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -579,18 +579,24 @@ protected function _importLoop($startAt = null)
579579
if ($startAt) {
580580
$rows->seek($startAt);
581581
}
582-
$rows->skipInvalidRows(true);
583582
$this->_log("Running item import loop. Memory usage: %memory%");
584583
while ($rows->valid()) {
585584
$row = $rows->current();
586585
$index = $rows->key();
587-
$this->skipped_row_count += $rows->getSkippedCount();
588-
if ($item = $this->_addItemFromRow($row)) {
589-
release_object($item);
590-
} else {
586+
587+
if ($row === false) {
588+
$this->skipped_row_count++;
589+
$this->_log("Row {$index}: Incorrect number of columns, skipped.", Zend_Log::WARN);
590+
} elseif ($this->_rowIsBlank($row)) {
591+
$this->skipped_row_count++;
592+
$this->_log("Row {$index}: Blank row, skipped.", Zend_Log::WARN);
593+
} elseif (!($item = $this->_addItemFromRow($row))) {
591594
$this->skipped_item_count++;
592-
$this->_log("Skipped item on row #{$index}.", Zend_Log::WARN);
595+
$this->_log("Row {$index}: Skipped item.", Zend_Log::WARN);
596+
} else {
597+
release_object($item);
593598
}
599+
594600
$this->file_position = $this->getCsvFile()->getIterator()->tell();
595601
if ($this->_batchSize && ($index % $this->_batchSize == 0)) {
596602
$this->_log("Completed importing batch of $this->_batchSize "
@@ -599,7 +605,6 @@ protected function _importLoop($startAt = null)
599605
}
600606
$rows->next();
601607
}
602-
$this->skipped_row_count += $rows->getSkippedCount();
603608
return $this->complete();
604609
} catch (Omeka_Job_Worker_InterruptException $e) {
605610
// Interruptions usually indicate that we should resume from
@@ -671,27 +676,28 @@ protected function _undoImportLoop()
671676
}
672677

673678
/**
674-
* Adds a new item based on a row string in the CSV file and returns it.
679+
* Check if a given row is completely blank (no values).
675680
*
676-
* @param string $row A row string in the CSV file
677-
* @return Item|boolean The inserted item or false if an item could not be added.
681+
* @param array $row A row of CSV values
682+
* @return bool
678683
*/
679-
protected function _addItemFromRow($row)
684+
protected function _rowIsBlank($row)
680685
{
681-
// check if this row is all blank cells
682-
$blank = TRUE;
683686
foreach ($row as $value) {
684687
if (trim($value) !== '') {
685-
$blank = FALSE;
686-
break;
688+
return false;
687689
}
688690
}
689-
// if nothing but blank cells, skip this row
690-
if ($blank) {
691-
$this->_log('Blank row.', Zend_Log::INFO);
692-
return FALSE;
693-
}
694-
691+
return true;
692+
}
693+
/**
694+
* Adds a new item based on a row string in the CSV file and returns it.
695+
*
696+
* @param string $row A row string in the CSV file
697+
* @return Item|boolean The inserted item or false if an item could not be added.
698+
*/
699+
protected function _addItemFromRow($row)
700+
{
695701
$result = $this->getColumnMaps()->map($row);
696702
$tags = $result[CsvImport_ColumnMap::TYPE_TAG];
697703
$itemMetadata = array(

models/CsvImport/MissingColumnException.php

Lines changed: 0 additions & 10 deletions
This file was deleted.

models/CsvImport/RowIterator.php

Lines changed: 3 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@ class CsvImport_RowIterator implements SeekableIterator
1919
private $_valid = true;
2020
private $_colNames = array();
2121
private $_colCount = 0;
22-
private $_skipInvalidRows = false;
23-
private $_skippedRowCount = 0;
2422

2523
/**
2624
* @param string $filePath
@@ -103,16 +101,7 @@ public function key()
103101
*/
104102
public function next()
105103
{
106-
try {
107-
$this->_moveNext();
108-
} catch (CsvImport_MissingColumnException $e) {
109-
if ($this->_skipInvalidRows) {
110-
$this->_skippedRowCount++;
111-
$this->next();
112-
} else {
113-
throw $e;
114-
}
115-
}
104+
$this->_moveNext();
116105
}
117106

118107
/**
@@ -149,9 +138,6 @@ protected function _moveNext()
149138
$this->_currentRow = $this->_formatRow($nextRow);
150139
} else {
151140
$this->_currentRow = array();
152-
}
153-
154-
if (!$this->_currentRow) {
155141
fclose($this->_handle);
156142
$this->_valid = false;
157143
$this->_handle = null;
@@ -187,38 +173,11 @@ public function getColumnNames()
187173
return $this->_colNames;
188174
}
189175

190-
/**
191-
* Returns the number of rows that were skipped since the last time the
192-
* function was called.
193-
*
194-
* Skipped count is reset to 0 after each call to getSkippedCount(). This
195-
* makes it easier to aggregate the number over multiple job runs.
196-
*
197-
* @return int The number of rows skipped since last time function was called
198-
*/
199-
public function getSkippedCount()
200-
{
201-
$skipped = $this->_skippedRowCount;
202-
$this->_skippedRowCount = 0;
203-
return $skipped;
204-
}
205-
206-
/**
207-
* Sets whether to skip invalid rows.
208-
*
209-
* @param boolean $flag
210-
*/
211-
public function skipInvalidRows($flag)
212-
{
213-
$this->_skipInvalidRows = (boolean)$flag;
214-
}
215-
216176
/**
217177
* Formats a row.
218178
*
219179
* @throws LogicException
220-
* @throws CsvImport_MissingColumnException
221-
* @return array The formatted row
180+
* @return array|false The formatted row, false if malformed (wrong number of columns)
222181
*/
223182
protected function _formatRow($row)
224183
{
@@ -228,10 +187,7 @@ protected function _formatRow($row)
228187
. "names have been set.");
229188
}
230189
if (count($row) != $this->_colCount) {
231-
$printable = substr(join($this->_columnDelimiter, $row), 0, 30) . '...';
232-
throw new CsvImport_MissingColumnException("Row beginning with "
233-
. "'$printable' does not have the required {$this->_colCount} "
234-
. "rows.");
190+
return false;
235191
}
236192
for ($i = 0; $i < $this->_colCount; $i++)
237193
{

0 commit comments

Comments
 (0)