Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
Unreleased
----------

- feat: :doc:`/scripts/csvclean` adds a :code:`--remove-empty-columns` option to drop columns that are empty in all data rows from standard output.

2.2.0 - December 15, 2025
-------------------------

Expand Down
21 changes: 13 additions & 8 deletions csvkit/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(
# Other
zero_based=False,
omit_error_rows=False,
report_empty_columns=True,
):
self.reader = reader
# Checks
Expand All @@ -62,6 +63,7 @@ def __init__(
# Other
self.zero_based = zero_based
self.omit_error_rows = omit_error_rows
self.report_empty_columns = report_empty_columns

try:
self.column_names = next(reader)
Expand All @@ -71,6 +73,7 @@ def __init__(
self.column_names = []

self.errors = []
self.empty_column_indices = []

def checked_rows(self):
"""
Expand Down Expand Up @@ -147,12 +150,14 @@ def checked_rows(self):

if row_count: # Don't report all columns as empty if there are no data rows.
if empty_columns := [i for i, count in enumerate(empty_counts) if count == row_count]:
offset = 0 if self.zero_based else 1
self.errors.append(
Error(
1,
["" for _ in range(len_column_names)],
f"Empty columns named {', '.join(repr(self.column_names[i]) for i in empty_columns)}! "
f"Try: csvcut -C {','.join(str(i + offset) for i in empty_columns)}",
self.empty_column_indices = empty_columns
if self.report_empty_columns:
offset = 0 if self.zero_based else 1
self.errors.append(
Error(
1,
["" for _ in range(len_column_names)],
f"Empty columns named {', '.join(repr(self.column_names[i]) for i in empty_columns)}! "
f"Try: csvcut -C {','.join(str(i + offset) for i in empty_columns)}",
)
)
)
20 changes: 16 additions & 4 deletions csvkit/utilities/csvclean.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ def add_arguments(self):
self.argparser.add_argument(
'--fillvalue', dest='fillvalue',
help='The value with which to fill short rows. Defaults to none.')
self.argparser.add_argument(
'--remove-empty-columns', dest='remove_empty_columns', action='store_true',
help='Remove columns that are empty in all data rows from standard output.')

def main(self):
if self.additional_input_expected():
Expand All @@ -59,6 +62,7 @@ def main(self):
and not self.args.header_normalize_space
and not self.args.join_short_rows
and not self.args.fill_short_rows
and not self.args.remove_empty_columns
):
self.argparser.error('No checks or fixes were enabled. See available options with: csvclean --help')

Expand All @@ -73,7 +77,7 @@ def main(self):
reader,
# Checks
length_mismatch=default or self.args.length_mismatch,
empty_columns=default or self.args.empty_columns,
empty_columns=default or self.args.empty_columns or self.args.remove_empty_columns,
# Fixes
header_normalize_space=self.args.header_normalize_space,
join_short_rows=self.args.join_short_rows,
Expand All @@ -83,6 +87,7 @@ def main(self):
# Other
zero_based=self.args.zero_based,
omit_error_rows=self.args.omit_error_rows,
report_empty_columns=default or self.args.empty_columns,
)

label = self.args.label
Expand All @@ -93,9 +98,16 @@ def main(self):
label = self.input_file.name

output_writer = agate.csv.writer(self.output_file, **self.writer_kwargs)
output_writer.writerow(checker.column_names)
for row in checker.checked_rows():
output_writer.writerow(row)
if self.args.remove_empty_columns:
rows = list(checker.checked_rows())
keep = [i for i, name in enumerate(checker.column_names) if i not in checker.empty_column_indices]
output_writer.writerow([checker.column_names[i] for i in keep])
for row in rows:
output_writer.writerow([row[i] if i < len(row) else '' for i in keep])
else:
output_writer.writerow(checker.column_names)
for row in checker.checked_rows():
output_writer.writerow(row)

if checker.errors:
error_writer = agate.csv.writer(self.error_file, **self.writer_kwargs)
Expand Down
5 changes: 5 additions & 0 deletions docs/scripts/csvclean.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ Fixes
1,Alice,US
2,Bob,CA

- If a CSV has columns that are empty in every data row, use :code:`--remove-empty-columns` to drop them from standard output.

.. seealso::

:code:`--header-normalize-space` under :ref:`csvclean-usage`.
Expand Down Expand Up @@ -140,6 +142,9 @@ Usage
--fillvalue FILLVALUE
The value with which to fill short rows. Defaults to
none.
--remove-empty-columns
Remove columns that are empty in all data rows from
standard output.


See also: :doc:`../common_arguments`.
Expand Down
14 changes: 14 additions & 0 deletions tests/test_utilities/test_csvclean.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,20 @@ def test_empty_columns_zero(self):
['1', "Empty columns named 'b', '', ''! Try: csvcut -C 1,3,4", '', '', '', '', ''],
])

def test_remove_empty_columns(self):
self.assertCleaned(['--remove-empty-columns', 'examples/test_empty_columns.csv'], [
['a', 'c'],
['a', ''],
['', 'c'],
['', ''],
])

def test_remove_empty_columns_no_empty(self):
self.assertCleaned(['--remove-empty-columns', 'examples/dummy.csv'], [
['a', 'b', 'c'],
['1', '2', '3'],
])

def test_enable_all_checks(self):
self.assertCleaned(['-a', 'examples/test_empty_columns.csv'], [
['a', 'b', 'c', '', ''],
Expand Down