Skip to content

Commit 85e2d8d

Browse files
committed
Unify read and parse into a single function
1 parent 52dbc5d commit 85e2d8d

5 files changed

Lines changed: 50 additions & 138 deletions

File tree

harp/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from harp.io import REFERENCE_EPOCH, MessageType, parse, read
1+
from harp.io import REFERENCE_EPOCH, MessageType, read
22
from harp.reader import create_reader
33
from harp.schema import read_schema
44

5-
__all__ = ["REFERENCE_EPOCH", "MessageType", "parse", "read", "create_reader", "read_schema"]
5+
__all__ = ["REFERENCE_EPOCH", "MessageType", "read", "create_reader", "read_schema"]

harp/io.py

Lines changed: 14 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import pandas as pd
99
from pandas._typing import Axes
1010

11-
from harp.typing import BufferLike
11+
from harp.typing import _BufferLike, _FileLike
1212

1313
REFERENCE_EPOCH = datetime(1904, 1, 1)
1414
"""The reference epoch for UTC harp time."""
@@ -41,71 +41,30 @@ class MessageType(IntEnum):
4141

4242

4343
def read(
44-
file: Union[str, bytes, PathLike[Any], BinaryIO],
45-
address: Optional[int] = None,
46-
dtype: Optional[np.dtype] = None,
47-
length: Optional[int] = None,
48-
columns: Optional[Axes] = None,
49-
epoch: Optional[datetime] = None,
50-
keep_type: bool = False,
51-
):
52-
"""Read single-register Harp data from the specified file.
53-
54-
Parameters
55-
----------
56-
file
57-
Open file object or filename containing binary data from
58-
a single device register.
59-
address
60-
Expected register address. If specified, the address of
61-
the first message in the file is used for validation.
62-
dtype
63-
Expected data type of the register payload. If specified, the
64-
payload type of the first message in the file is used for validation.
65-
length
66-
Expected number of elements in register payload. If specified, the
67-
payload length of the first message in the file is used for validation.
68-
columns
69-
The optional column labels to use for the data values.
70-
epoch
71-
Reference datetime at which time zero begins. If specified,
72-
the result data frame will have a datetime index.
73-
keep_type
74-
Specifies whether to include a column with the message type.
75-
76-
Returns
77-
-------
78-
A pandas data frame containing message data, sorted by time.
79-
"""
80-
data = np.fromfile(file, dtype=np.uint8)
81-
return _fromraw(data, address, dtype, length, columns, epoch, keep_type)
82-
83-
84-
def parse(
85-
buffer: BufferLike,
44+
file_or_buf: Union[_FileLike, _BufferLike],
8645
address: Optional[int] = None,
8746
dtype: Optional[np.dtype] = None,
8847
length: Optional[int] = None,
8948
columns: Optional[Axes] = None,
9049
epoch: Optional[datetime] = None,
9150
keep_type: bool = False,
9251
):
93-
"""Parse single-register Harp data from the specified buffer.
52+
"""Read single-register Harp data from the specified file or buffer.
9453
9554
Parameters
9655
----------
97-
buffer
98-
An object that exposes a buffer interface containing binary data from
56+
file_or_buf
57+
File path, open file object, or buffer containing binary data from
9958
a single device register.
10059
address
10160
Expected register address. If specified, the address of
102-
the first message in the buffer is used for validation.
61+
the first message is used for validation.
10362
dtype
10463
Expected data type of the register payload. If specified, the
105-
payload type of the first message in the buffer is used for validation.
64+
payload type of the first message is used for validation.
10665
length
10766
Expected number of elements in register payload. If specified, the
108-
payload length of the first message in the buffer is used for validation.
67+
payload length of the first message is used for validation.
10968
columns
11069
The optional column labels to use for the data values.
11170
epoch
@@ -118,19 +77,13 @@ def parse(
11877
-------
11978
A pandas data frame containing message data, sorted by time.
12079
"""
121-
data = np.frombuffer(buffer, dtype=np.uint8)
122-
return _fromraw(data, address, dtype, length, columns, epoch, keep_type)
123-
80+
if isinstance(file_or_buf, (str, PathLike, BinaryIO)) or hasattr(file_or_buf, "readinto"):
81+
# TODO: in the below we ignore the type as otherwise
82+
# we have no way to runtime check _IOProtocol
83+
data = np.fromfile(file_or_buf, dtype=np.uint8) # type: ignore
84+
else:
85+
data = np.frombuffer(file_or_buf, dtype=np.uint8)
12486

125-
def _fromraw(
126-
data: npt.NDArray[np.uint8],
127-
address: Optional[int] = None,
128-
dtype: Optional[np.dtype] = None,
129-
length: Optional[int] = None,
130-
columns: Optional[Axes] = None,
131-
epoch: Optional[datetime] = None,
132-
keep_type: bool = False,
133-
):
13487
if len(data) == 0:
13588
return pd.DataFrame(
13689
columns=columns,

harp/reader.py

Lines changed: 13 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -6,38 +6,29 @@
66
from math import log2
77
from os import PathLike
88
from pathlib import Path
9-
from typing import Any, BinaryIO, Callable, Iterable, Mapping, Optional, Protocol, Union
9+
from typing import Callable, Iterable, Mapping, Optional, Protocol, Union
1010

1111
from numpy import dtype
1212
from pandas import DataFrame, Series
1313
from pandas._typing import Axes
1414

15-
from harp.io import MessageType, parse, read
15+
from harp.io import MessageType, read
1616
from harp.model import BitMask, GroupMask, Model, PayloadMember, Register
1717
from harp.schema import read_schema
18-
from harp.typing import BufferLike
18+
from harp.typing import _BufferLike, _FileLike
1919

2020

2121
@dataclass
2222
class _ReaderParams:
23-
path: Path
23+
base_path: Path
2424
epoch: Optional[datetime] = None
2525
keep_type: bool = False
2626

2727

2828
class _ReadRegister(Protocol):
2929
def __call__(
3030
self,
31-
file: Optional[Union[str, bytes, PathLike[Any], BinaryIO]] = None,
32-
epoch: Optional[datetime] = None,
33-
keep_type: bool = False,
34-
) -> DataFrame: ...
35-
36-
37-
class _ParseRegister(Protocol):
38-
def __call__(
39-
self,
40-
buffer: BufferLike,
31+
file_or_buf: Optional[Union[_FileLike, _BufferLike]] = None,
4132
epoch: Optional[datetime] = None,
4233
keep_type: bool = False,
4334
) -> DataFrame: ...
@@ -46,17 +37,14 @@ def __call__(
4637
class RegisterReader:
4738
register: Register
4839
read: _ReadRegister
49-
parse: _ParseRegister
5040

5141
def __init__(
5242
self,
5343
register: Register,
5444
read: _ReadRegister,
55-
parse: _ParseRegister,
5645
) -> None:
5746
self.register = register
5847
self.read = read
59-
self.parse = parse
6048

6149

6250
class RegisterMap(UserDict[str, RegisterReader]):
@@ -180,16 +168,16 @@ def parser(df: DataFrame):
180168

181169
def _create_register_reader(register: Register, params: _ReaderParams):
182170
def reader(
183-
file: Optional[Union[str, bytes, PathLike[Any], BinaryIO]] = None,
171+
file_or_buf: Optional[Union[_FileLike, _BufferLike]] = None,
184172
columns: Optional[Axes] = None,
185173
epoch: Optional[datetime] = params.epoch,
186174
keep_type: bool = params.keep_type,
187175
):
188-
if file is None:
189-
file = f"{params.path}_{register.address}.bin"
176+
if file_or_buf is None:
177+
file_or_buf = f"{params.base_path}_{register.address}.bin"
190178

191179
data = read(
192-
file,
180+
file_or_buf,
193181
address=register.address,
194182
dtype=dtype(register.type),
195183
length=register.length,
@@ -202,46 +190,23 @@ def reader(
202190
return reader
203191

204192

205-
def _create_register_parser(register: Register, params: _ReaderParams):
206-
def parser(
207-
buffer: BufferLike,
208-
columns: Optional[Axes] = None,
209-
epoch: Optional[datetime] = params.epoch,
210-
keep_type: bool = params.keep_type,
211-
):
212-
return parse(
213-
buffer,
214-
address=register.address,
215-
dtype=dtype(register.type),
216-
length=register.length,
217-
columns=columns,
218-
epoch=epoch,
219-
keep_type=keep_type,
220-
)
221-
222-
return parser
223-
224-
225193
def _create_register_handler(device: Model, name: str, params: _ReaderParams):
226194
register = device.registers[name]
227195
reader = _create_register_reader(register, params)
228-
parser = _create_register_parser(register, params)
229196

230197
if register.maskType is not None:
231198
key = register.maskType.root
232199
bitMask = None if device.bitMasks is None else device.bitMasks.get(key)
233200
if bitMask is not None:
234201
bitmask_parser = _create_bitmask_parser(bitMask)
235202
reader = _compose_parser(bitmask_parser, reader, params)
236-
parser = _compose_parser(bitmask_parser, parser, params)
237-
return RegisterReader(register, reader, parser)
203+
return RegisterReader(register, reader)
238204

239205
groupMask = None if device.groupMasks is None else device.groupMasks.get(key)
240206
if groupMask is not None:
241207
groupmask_parser = _create_groupmask_parser(name, groupMask)
242208
reader = _compose_parser(groupmask_parser, reader, params)
243-
parser = _compose_parser(groupmask_parser, parser, params)
244-
return RegisterReader(register, reader, parser)
209+
return RegisterReader(register, reader)
245210

246211
if register.payloadSpec is not None:
247212
member_parsers = [
@@ -253,17 +218,15 @@ def payload_parser(df: DataFrame):
253218
return DataFrame({n: f(df) for n, f in member_parsers}, index=df.index)
254219

255220
reader = _compose_parser(payload_parser, reader, params)
256-
parser = _compose_parser(payload_parser, parser, params)
257-
return RegisterReader(register, reader, parser)
221+
return RegisterReader(register, reader)
258222

259223
columns = (
260224
[name]
261225
if register.length is None or register.length == 1
262226
else [f"{name}_{i}" for i in range(register.length)]
263227
)
264228
reader = partial(reader, columns=columns)
265-
parser = partial(parser, columns=columns)
266-
return RegisterReader(register, reader, parser)
229+
return RegisterReader(register, reader)
267230

268231

269232
def create_reader(

harp/typing.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
import mmap
22
import sys
3-
from typing import Any, Union
3+
from os import PathLike
4+
from typing import Any, BinaryIO, Union
45

56
from numpy.typing import NDArray
67

78
if sys.version_info >= (3, 12):
8-
from collections.abc import Buffer as BufferLike
9+
from collections.abc import Buffer as _BufferLike
910
else:
10-
BufferLike = Union[bytes, bytearray, memoryview, mmap.mmap, NDArray[Any]]
11+
_BufferLike = Union[bytes, bytearray, memoryview, mmap.mmap, NDArray[Any]]
12+
13+
_FileLike = Union[str, PathLike[str], BinaryIO]

tests/test_io.py

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pytest
66
from pytest import mark
77

8-
from harp.io import REFERENCE_EPOCH, MessageType, format, parse, read
8+
from harp.io import REFERENCE_EPOCH, MessageType, format, read
99
from tests.params import DataFileParam
1010

1111
testdata = [
@@ -41,27 +41,20 @@
4141
def test_read(dataFile: DataFileParam):
4242
context = pytest.raises if dataFile.expected_error else nullcontext
4343
with context(dataFile.expected_error): # type: ignore
44-
path = dataFile.path
44+
file_or_buf = dataFile.path
4545
if dataFile.repeat_data:
46-
with open(path, "rb") as f:
47-
buffer = f.read() * dataFile.repeat_data
48-
data = parse(
49-
buffer,
50-
address=dataFile.expected_address,
51-
dtype=dataFile.expected_dtype,
52-
length=dataFile.expected_length,
53-
epoch=dataFile.epoch,
54-
keep_type=dataFile.keep_type,
55-
)
56-
else:
57-
data = read(
58-
path,
59-
address=dataFile.expected_address,
60-
dtype=dataFile.expected_dtype,
61-
length=dataFile.expected_length,
62-
epoch=dataFile.epoch,
63-
keep_type=dataFile.keep_type,
64-
)
46+
with open(file_or_buf, "rb") as f:
47+
file_or_buf = f.read() * dataFile.repeat_data
48+
49+
data = read(
50+
file_or_buf,
51+
address=dataFile.expected_address,
52+
dtype=dataFile.expected_dtype,
53+
length=dataFile.expected_length,
54+
epoch=dataFile.epoch,
55+
keep_type=dataFile.keep_type,
56+
)
57+
6558
assert len(data) == dataFile.expected_rows
6659
assert isinstance(data.index, pd.DatetimeIndex if dataFile.epoch else pd.Index)
6760
if dataFile.keep_type:
@@ -83,7 +76,7 @@ def test_write(dataFile: DataFileParam):
8376
raise AssertionError("expected address must be defined for all write tests")
8477

8578
buffer = np.fromfile(dataFile.path, np.uint8)
86-
data = parse(
79+
data = read(
8780
buffer,
8881
address=dataFile.expected_address,
8982
dtype=dataFile.expected_dtype,

0 commit comments

Comments
 (0)