Skip to content

Commit 1628082

Browse files
committed
convert to mmap, performance is worse
1 parent 678d331 commit 1628082

3 files changed

Lines changed: 127 additions & 48 deletions

File tree

src/msgpack_stream/_io.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def pack(obj):
1313

1414
def unpack(data):
1515
"""Unpack data into object."""
16-
with io.BytesIO(data) as stream:
17-
obj = unpack_stream(stream)
18-
excess_data = stream.read()
16+
with io.BytesIO(data) as stream, stream.getbuffer() as mv:
17+
obj = unpack_stream(mv)
18+
excess_data = bytes(mv[unpack_stream.__offset__:])
1919
return obj, excess_data

src/msgpack_stream/_msgpack.py

Lines changed: 118 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
f64_b_t,
1212
)
1313

14-
# deference for performance
14+
# dereference for performance
1515
u8_b_pack = u8_b_t.pack
1616
u16_b_pack = u16_b_t.pack
1717
u32_b_pack = u32_b_t.pack
@@ -34,6 +34,17 @@
3434
f32_b_unpack = f32_b_t.unpack
3535
f64_b_unpack = f64_b_t.unpack
3636

37+
u8_s = u8_b_t.struct
38+
u16_s = u16_b_t.struct
39+
u32_s = u32_b_t.struct
40+
u64_s = u64_b_t.struct
41+
s8_s = s8_b_t.struct
42+
s16_s = s16_b_t.struct
43+
s32_s = s32_b_t.struct
44+
s64_s = s64_b_t.struct
45+
f32_s = f32_b_t.struct
46+
f64_s = f64_b_t.struct
47+
3748

3849
def pack_stream(stream, obj):
3950
if isinstance(obj, dict):
@@ -141,63 +152,96 @@ def pack_stream(stream, obj):
141152
raise TypeError("type not supported:", obj, type(obj))
142153

143154

144-
def unpack_stream(stream):
145-
first_byte = u8_b_unpack(stream)
155+
def unpack_stream(stream, offset=0):
156+
(first_byte,) = u8_s.unpack_from(stream, offset)
157+
offset += u8_s.size
146158
if first_byte <= 0x7F:
147159
obj = first_byte
148160
elif first_byte <= 0x8F:
149161
map_length = first_byte & 0b1111
150-
obj = {unpack_stream(stream): unpack_stream(stream) for _ in range(map_length)}
162+
unpack_stream.__offset__ = offset
163+
obj = {
164+
unpack_stream(stream, unpack_stream.__offset__): unpack_stream(
165+
stream, unpack_stream.__offset__
166+
)
167+
for _ in range(map_length)
168+
}
169+
offset = unpack_stream.__offset__
151170
elif first_byte <= 0x9F:
152171
array_length = first_byte & 0b1111
153-
obj = [unpack_stream(stream) for _ in range(array_length)]
172+
unpack_stream.__offset__ = offset
173+
obj = [
174+
unpack_stream(stream, unpack_stream.__offset__) for _ in range(array_length)
175+
]
176+
offset = unpack_stream.__offset__
154177
elif first_byte <= 0xBF:
155178
str_length = first_byte & 0b11111
156-
obj = stream.read(str_length).decode("utf-8")
179+
_offset = offset + str_length
180+
obj = bytes(stream[offset:_offset]).decode("utf-8")
181+
offset = _offset
157182
elif 0xE0 <= first_byte and first_byte <= 0xFF:
158-
stream.seek(-1, 1)
159-
obj = s8_b_unpack(stream)
183+
(obj,) = s8_s.unpack_from(stream, offset - s8_s.size)
184+
# peek backwards, offset is good where it's at
160185
elif first_byte == 0xC0:
161186
obj = None
162187
elif first_byte == 0xC2:
163188
obj = False
164189
elif first_byte == 0xC3:
165190
obj = True
166191
elif first_byte == 0xC4:
167-
bin_length = u8_b_unpack(stream)
168-
obj = stream.read(bin_length)
192+
(bin_length,) = u8_s.unpack_from(stream, offset)
193+
offset += u8_s.size
194+
_offset = offset + bin_length
195+
obj = bytes(stream[offset:_offset])
196+
offset = _offset
169197
elif first_byte == 0xC5:
170-
bin_length = u16_b_unpack(stream)
171-
obj = stream.read(bin_length)
198+
(bin_length,) = u16_s.unpack_from(stream, offset)
199+
offset += u16_s.size
200+
_offset = offset + bin_length
201+
obj = bytes(stream[offset:_offset])
202+
offset = _offset
172203
elif first_byte == 0xC6:
173-
bin_length = u32_b_unpack(stream)
174-
obj = stream.read(bin_length)
204+
(bin_length,) = u32_s.unpack_from(stream, offset)
205+
offset += u32_s.size
206+
_offset = offset + bin_length
207+
obj = bytes(stream[offset:_offset])
208+
offset = _offset
175209
elif first_byte == 0xC7:
176210
raise NotImplementedError
177211
elif first_byte == 0xC8:
178212
raise NotImplementedError
179213
elif first_byte == 0xC9:
180214
raise NotImplementedError
181215
elif first_byte == 0xCA:
182-
obj = f32_b_unpack(stream)
216+
(obj,) = f32_s.unpack_from(stream, offset)
217+
offset += f32_s.size
183218
elif first_byte == 0xCB:
184-
obj = f64_b_unpack(stream)
219+
(obj,) = f64_s.unpack_from(stream, offset)
220+
offset += f64_s.size
185221
elif first_byte == 0xCC:
186-
obj = u8_b_unpack(stream)
222+
(obj,) = u8_s.unpack_from(stream, offset)
223+
offset += u8_s.size
187224
elif first_byte == 0xCD:
188-
obj = u16_b_unpack(stream)
225+
(obj,) = u16_s.unpack_from(stream, offset)
226+
offset += u16_s.size
189227
elif first_byte == 0xCE:
190-
obj = u32_b_unpack(stream)
228+
(obj,) = u32_s.unpack_from(stream, offset)
229+
offset += u32_s.size
191230
elif first_byte == 0xCF:
192-
obj = u64_b_unpack(stream)
231+
(obj,) = u64_s.unpack_from(stream, offset)
232+
offset += u64_s.size
193233
elif first_byte == 0xD0:
194-
obj = s8_b_unpack(stream)
234+
(obj,) = s8_s.unpack_from(stream, offset)
235+
offset += s8_s.size
195236
elif first_byte == 0xD1:
196-
obj = s16_b_unpack(stream)
237+
(obj,) = s16_s.unpack_from(stream, offset)
238+
offset += s16_s.size
197239
elif first_byte == 0xD2:
198-
obj = s32_b_unpack(stream)
240+
(obj,) = s32_s.unpack_from(stream, offset)
241+
offset += s32_s.size
199242
elif first_byte == 0xD3:
200-
obj = s64_b_unpack(stream)
243+
(obj,) = s64_s.unpack_from(stream, offset)
244+
offset += s64_s.size
201245
elif first_byte == 0xD4:
202246
raise NotImplementedError
203247
elif first_byte == 0xD5:
@@ -209,27 +253,63 @@ def unpack_stream(stream):
209253
elif first_byte == 0xD8:
210254
raise NotImplementedError
211255
elif first_byte == 0xD9:
212-
str_length = u8_b_unpack(stream)
213-
obj = stream.read(str_length).decode("utf-8")
256+
(str_length,) = u8_s.unpack_from(stream, offset)
257+
offset += u8_s.size
258+
_offset = offset + str_length
259+
obj = bytes(stream[offset:_offset]).decode("utf-8")
260+
offset = _offset
214261
elif first_byte == 0xDA:
215-
str_length = u16_b_unpack(stream)
216-
obj = stream.read(str_length).decode("utf-8")
262+
(str_length,) = u16_s.unpack_from(stream, offset)
263+
offset += u16_s.size
264+
_offset = offset + str_length
265+
obj = bytes(stream[offset:_offset]).decode("utf-8")
266+
offset = _offset
217267
elif first_byte == 0xDB:
218-
str_length = u32_b_unpack(stream)
219-
obj = stream.read(str_length).decode("utf-8")
268+
(str_length,) = u32_s.unpack_from(stream, offset)
269+
offset += u32_s.size
270+
_offset = offset + str_length
271+
obj = bytes(stream[offset:_offset]).decode("utf-8")
272+
offset = _offset
220273
elif first_byte == 0xDC:
221-
array_length = u16_b_unpack(stream)
222-
obj = [unpack_stream(stream) for _ in range(array_length)]
274+
(array_length,) = u16_s.unpack_from(stream, offset)
275+
offset += u16_s.size
276+
unpack_stream.__offset__ = offset
277+
obj = [
278+
unpack_stream(stream, unpack_stream.__offset__) for _ in range(array_length)
279+
]
280+
offset = unpack_stream.__offset__
223281
elif first_byte == 0xDD:
224-
array_length = u32_b_unpack(stream)
225-
obj = [unpack_stream(stream) for _ in range(array_length)]
282+
(array_length,) = u32_s.unpack_from(stream, offset)
283+
offset += u32_s.size
284+
unpack_stream.__offset__ = offset
285+
obj = [
286+
unpack_stream(stream, unpack_stream.__offset__) for _ in range(array_length)
287+
]
288+
offset = unpack_stream.__offset__
226289
elif first_byte == 0xDE:
227-
map_length = u16_b_unpack(stream)
228-
obj = {unpack_stream(stream): unpack_stream(stream) for _ in range(map_length)}
290+
(map_length,) = u16_s.unpack_from(stream, offset)
291+
offset += u16_s.size
292+
unpack_stream.__offset__ = offset
293+
obj = {
294+
unpack_stream(stream, unpack_stream.__offset__): unpack_stream(
295+
stream, offset=unpack_stream.__offset__
296+
)
297+
for _ in range(map_length)
298+
}
299+
offset = unpack_stream.__offset__
229300
elif first_byte == 0xDF:
230-
map_length = u32_b_unpack(stream)
231-
obj = {unpack_stream(stream): unpack_stream(stream) for _ in range(map_length)}
301+
(map_length,) = u32_s.unpack_from(stream, offset)
302+
offset += u32_s.size
303+
unpack_stream.__offset__ = offset
304+
obj = {
305+
unpack_stream(stream, unpack_stream.__offset__): unpack_stream(
306+
stream, offset=unpack_stream.__offset__
307+
)
308+
for _ in range(map_length)
309+
}
310+
offset = unpack_stream.__offset__
232311
else:
233312
raise ValueError("invalid first byte", first_byte, hex(first_byte))
234313

314+
unpack_stream.__offset__ = offset
235315
return obj

tests/test_stream.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,9 @@
2323

2424

2525
def test_stream():
26-
stream = io.BytesIO()
27-
pack_stream(stream, obj)
28-
pack_stream(stream, obj)
29-
stream.seek(0)
30-
assert unpack_stream(stream) == obj
31-
assert unpack_stream(stream) == obj
32-
stream.close()
26+
with io.BytesIO() as stream:
27+
pack_stream(stream, obj)
28+
pack_stream(stream, obj)
29+
with stream.getbuffer() as mv:
30+
assert unpack_stream(mv) == obj
31+
assert unpack_stream(mv) == obj

0 commit comments

Comments
 (0)