Skip to content

Commit 5472a1a

Browse files
authored
Merge pull request #58 from PickwickSoft/feature/#57/add-csv-loader
Feature/#57/add csv loader
2 parents 7ae0711 + 94b2497 commit 5472a1a

8 files changed

Lines changed: 97 additions & 2 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "streams.py"
3-
version = "0.3.3"
3+
version = "1.0.0"
44
authors = ["Stefan Garlonta <stefan@pickwicksoft.org>"]
55
description = "A stream library for Python inspired by Java Stream API"
66
keywords = ["streams", "parallel", "data"]

pystreamapi/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from pystreamapi.__stream import Stream
22
from pystreamapi._streams.error.__levels import ErrorLevel
33

4-
__version__ = "0.3.3"
4+
__version__ = "1.0.0"
55
__all__ = ["Stream", "ErrorLevel"]
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import contextlib
2+
import os
3+
from collections import namedtuple
4+
from csv import reader
5+
6+
7+
def csv(file_path: str, delimiter=',', encoding="utf-8") -> list:
8+
"""
9+
Loads a CSV file and converts it into a list of namedtuples.
10+
11+
Returns:
12+
list: A list of namedtuples, where each namedtuple represents a row in the CSV.
13+
:param encoding: The encoding of the CSV file.
14+
:param file_path: The path to the CSV file.
15+
:param delimiter: The delimiter used in the CSV file.
16+
"""
17+
file_path = __validate_path(file_path)
18+
# skipcq: PTC-W6004
19+
with open(file_path, 'r', newline='', encoding=encoding) as csvfile:
20+
csvreader = reader(csvfile, delimiter=delimiter)
21+
22+
# Create a namedtuple type, casting the header values to int or float if possible
23+
Row = namedtuple('Row', list(next(csvreader, [])))
24+
25+
# Process the data, casting values to int or float if possible
26+
data = [Row(*[__try_cast(value) for value in row]) for row in csvreader]
27+
28+
return data
29+
30+
31+
def __validate_path(file_path: str):
32+
"""Validate a path string to prevent path traversal attacks"""
33+
if not os.path.isabs(file_path):
34+
raise ValueError("The file_path must be an absolute path.")
35+
36+
if not os.path.exists(file_path):
37+
raise FileNotFoundError("The specified file does not exist.")
38+
39+
return file_path
40+
41+
42+
def __try_cast(value):
43+
"""Try to cast value to primary data types from python (int, float, bool)"""
44+
for cast in (int, float):
45+
with contextlib.suppress(ValueError):
46+
return cast(value)
47+
# Try to cast to bool
48+
return value.lower() == 'true' if value.lower() in ('true', 'false') else value

pystreamapi/loaders/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from pystreamapi.loaders.__csv_loader import csv
2+
3+
__all__ = [
4+
'csv'
5+
]

tests/assets/data.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
attr1,attr2
2+
1,2.0
3+
a,b

tests/assets/data2.csv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
attr1;attr2
2+
1;2

tests/assets/empty.csv

Whitespace-only changes.

tests/test_loaders.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import os
2+
from unittest import TestCase
3+
from pystreamapi.loaders import csv
4+
5+
class TestLoaders(TestCase):
6+
7+
def setUp(self) -> None:
8+
cwd = os.path.dirname(os.path.realpath(__file__))
9+
self.path = os.path.join(cwd, 'assets')
10+
11+
def test_csv_loader(self):
12+
data = csv(f'{self.path}/data.csv')
13+
self.assertEqual(len(data), 2)
14+
self.assertEqual(data[0].attr1, 1)
15+
self.assertIsInstance(data[0].attr1, int)
16+
self.assertEqual(data[0].attr2, 2.0)
17+
self.assertIsInstance(data[0].attr2, float)
18+
self.assertEqual(data[1].attr1, 'a')
19+
self.assertIsInstance(data[1].attr1, str)
20+
21+
def test_csv_loader_with_custom_delimiter(self):
22+
data = csv(f'{self.path}/data2.csv', delimiter=';')
23+
self.assertEqual(len(data), 1)
24+
self.assertEqual(data[0].attr1, 1)
25+
self.assertIsInstance(data[0].attr1, int)
26+
27+
def test_csv_loader_with_empty_file(self):
28+
data = csv(f'{self.path}/empty.csv')
29+
self.assertEqual(len(data), 0)
30+
31+
def test_csv_loader_with_invalid_path(self):
32+
with self.assertRaises(FileNotFoundError):
33+
csv(f'{self.path}/invalid.csv')
34+
35+
def test_csv_loader_with_non_absolute_path(self):
36+
with self.assertRaises(ValueError):
37+
csv('invalid.csv')

0 commit comments

Comments
 (0)