Skip to content

Commit 775b687

Browse files
authored
Merge pull request #76 from PickwickSoft/bugfix/#63/fix-csv-loader-very-slow
⚡ Fix CSV loader very slow
2 parents dd00c9e + 4736671 commit 775b687

2 files changed

Lines changed: 18 additions & 5 deletions

File tree

pystreamapi/loaders/__csv_loader.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,22 @@
66
from pystreamapi.loaders.__lazy_file_iterable import LazyFileIterable
77

88

9-
def csv(file_path: str, delimiter=',', encoding="utf-8") -> LazyFileIterable:
9+
def csv(file_path: str, cast_types=True, delimiter=',', encoding="utf-8") -> LazyFileIterable:
1010
"""
1111
Loads a CSV file and converts it into a list of namedtuples.
1212
1313
Returns:
1414
list: A list of namedtuples, where each namedtuple represents a row in the CSV.
15+
:param cast_types: Set as False to disable casting of values to int, bool or float.
1516
:param encoding: The encoding of the CSV file.
1617
:param file_path: The path to the CSV file.
1718
:param delimiter: The delimiter used in the CSV file.
1819
"""
1920
file_path = __validate_path(file_path)
20-
return LazyFileIterable(lambda: __load_csv(file_path, delimiter, encoding))
21+
return LazyFileIterable(lambda: __load_csv(file_path, cast_types, delimiter, encoding))
2122

2223

23-
def __load_csv(file_path, delimiter, encoding):
24+
def __load_csv(file_path, cast, delimiter, encoding):
2425
"""Load a CSV file and convert it into a list of namedtuples"""
2526
# skipcq: PTC-W6004
2627
with open(file_path, mode='r', newline='', encoding=encoding) as csvfile:
@@ -29,8 +30,10 @@ def __load_csv(file_path, delimiter, encoding):
2930
# Create a namedtuple type, casting the header values to int or float if possible
3031
Row = namedtuple('Row', list(next(csvreader, [])))
3132

33+
mapper = __try_cast if cast else lambda x: x
34+
3235
# Process the data, casting values to int or float if possible
33-
data = [Row(*[__try_cast(value) for value in row]) for row in csvreader]
36+
data = [Row(*[mapper(value) for value in row]) for row in csvreader]
3437
return data
3538

3639

tests/test_loaders.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,16 @@ def test_csv_loader(self):
2020
self.assertEqual(data[1].attr1, 'a')
2121
self.assertIsInstance(data[1].attr1, str)
2222

23+
def test_csv_loader_with_casting_disabled(self):
24+
data = csv(f'{self.path}/data.csv', cast_types=False)
25+
self.assertEqual(len(data), 2)
26+
self.assertEqual(data[0].attr1, '1')
27+
self.assertIsInstance(data[0].attr1, str)
28+
self.assertEqual(data[0].attr2, '2.0')
29+
self.assertIsInstance(data[0].attr2, str)
30+
self.assertEqual(data[1].attr1, 'a')
31+
self.assertIsInstance(data[1].attr1, str)
32+
2333
def test_csv_loader_is_iterable(self):
2434
data = csv(f'{self.path}/data.csv')
2535
self.assertEqual(len(list(iter(data))), 2)
@@ -38,6 +48,6 @@ def test_csv_loader_with_invalid_path(self):
3848
with self.assertRaises(FileNotFoundError):
3949
csv(f'{self.path}/invalid.csv')
4050

41-
def test_csv_loader_with_non_file(self):
51+
def test_csv_loader_with_no_file(self):
4252
with self.assertRaises(ValueError):
4353
csv(f'{self.path}/')

0 commit comments

Comments
 (0)