Skip to content

Commit 8d8e727

Browse files
committed
improve performance
1 parent f15d5d4 commit 8d8e727

4 files changed

Lines changed: 431 additions & 190 deletions

File tree

.gitignore

Lines changed: 196 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,203 @@
1-
# Python-generated files
1+
# Byte-compiled / optimized / DLL files
22
__pycache__/
3-
*.py[oc]
3+
*.py[codz]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
411
build/
12+
develop-eggs/
513
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
622
wheels/
7-
*.egg-info
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
828

9-
# Virtual environments
10-
.venv
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py.cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
1173

74+
# PyBuilder
75+
.pybuilder/
76+
target/
77+
78+
# Jupyter Notebook
79+
.ipynb_checkpoints
80+
81+
# IPython
82+
profile_default/
83+
ipython_config.py
84+
85+
# pyenv
86+
# For a library or package, you might want to ignore these files since the code is
87+
# intended to run in multiple environments; otherwise, check them in:
88+
# .python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# UV
98+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99+
# This is especially recommended for binary packages to ensure reproducibility, and is more
100+
# commonly ignored for libraries.
12101
uv.lock
102+
103+
# poetry
104+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105+
# This is especially recommended for binary packages to ensure reproducibility, and is more
106+
# commonly ignored for libraries.
107+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108+
#poetry.lock
109+
#poetry.toml
110+
111+
# pdm
112+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115+
#pdm.lock
116+
#pdm.toml
117+
.pdm-python
118+
.pdm-build/
119+
120+
# pixi
121+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122+
#pixi.lock
123+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124+
# in the .venv directory. It is recommended not to include this directory in version control.
125+
.pixi
126+
127+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128+
__pypackages__/
129+
130+
# Celery stuff
131+
celerybeat-schedule
132+
celerybeat.pid
133+
134+
# SageMath parsed files
135+
*.sage.py
136+
137+
# Environments
138+
.env
139+
.envrc
140+
.venv
141+
env/
142+
venv/
143+
ENV/
144+
env.bak/
145+
venv.bak/
146+
147+
# Spyder project settings
148+
.spyderproject
149+
.spyproject
150+
151+
# Rope project settings
152+
.ropeproject
153+
154+
# mkdocs documentation
155+
/site
156+
157+
# mypy
158+
.mypy_cache/
159+
.dmypy.json
160+
dmypy.json
161+
162+
# Pyre type checker
163+
.pyre/
164+
165+
# pytype static type analyzer
166+
.pytype/
167+
168+
# Cython debug symbols
169+
cython_debug/
170+
171+
# PyCharm
172+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174+
# and can be added to the global gitignore or merged into this file. For a more nuclear
175+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
176+
#.idea/
177+
178+
# Abstra
179+
# Abstra is an AI-powered process automation framework.
180+
# Ignore directories containing user credentials, local state, and settings.
181+
# Learn more at https://abstra.io/docs
182+
.abstra/
183+
184+
# Visual Studio Code
185+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
188+
# you could uncomment the following to ignore the entire vscode folder
189+
# .vscode/
190+
191+
# Ruff stuff:
192+
.ruff_cache/
193+
194+
# PyPI configuration file
195+
.pypirc
196+
197+
# Marimo
198+
marimo/_static/
199+
marimo/_lsp/
200+
__marimo__/
201+
202+
# Streamlit
203+
.streamlit/secrets.toml

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,6 @@ build-backend = "uv_build"
1717
dev = [
1818
"msgpack>=1.1.1",
1919
"pytest>=8.4.1",
20+
"pytest-cov>=6.2.1",
2021
"ruff>=0.12.8",
2122
]

scripts/bench.py

Lines changed: 72 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,19 @@
33
# set before importing `msgpack`
44
os.environ["MSGPACK_PUREPYTHON"] = "1"
55

6-
from msgpack_stream import unpack, unpack_stream
7-
from mmap import mmap, ACCESS_READ
6+
from msgpack_stream import unpack, unpack_stream, pack, pack_stream
7+
from mmap import mmap, ACCESS_READ, ACCESS_WRITE
88
import argparse
99
import timeit
1010

11-
from msgpack import unpackb
11+
from msgpack import unpackb, packb
1212

1313

1414
FILE = "scripts/obj.msgpack"
1515

1616

1717
def main(mapped):
18-
with open(FILE, "rb", buffering=0) as fd:
18+
with open(FILE, "rb") as fd:
1919
if mapped:
2020
with mmap(fd.fileno(), 0, access=ACCESS_READ) as mm:
2121
return unpack(mm.read())
@@ -24,7 +24,7 @@ def main(mapped):
2424

2525

2626
def stream(mapped):
27-
with open(FILE, "rb", buffering=0) as fd:
27+
with open(FILE, "rb") as fd:
2828
if mapped:
2929
with mmap(fd.fileno(), 0, access=ACCESS_READ) as mm:
3030
return unpack_stream(mm)
@@ -33,29 +33,88 @@ def stream(mapped):
3333

3434

3535
def other(mapped):
36-
with open(FILE, "rb", buffering=0) as fd:
36+
with open(FILE, "rb") as fd:
3737
if mapped:
3838
with mmap(fd.fileno(), 0, access=ACCESS_READ) as mm:
3939
return unpackb(mm.read(), strict_map_key=False)
4040
else:
4141
return unpackb(fd.read(), strict_map_key=False)
4242

4343

44+
def serialize_main(obj, mapped):
45+
with open("temp", "wb") as fd:
46+
if mapped:
47+
with mmap(-1, 3955122, access=ACCESS_WRITE) as mm:
48+
mm.write(pack(obj))
49+
else:
50+
fd.write(pack(obj))
51+
52+
53+
def serialize_stream(obj, mapped):
54+
with open("temp", "wb") as fd:
55+
if mapped:
56+
with mmap(-1, 3955122, access=ACCESS_WRITE) as mm:
57+
pack_stream(mm, obj)
58+
else:
59+
pack_stream(fd, obj)
60+
61+
62+
def serialize_other(obj, mapped):
63+
with open("temp", "wb") as fd:
64+
if mapped:
65+
with mmap(-1, 3955122, access=ACCESS_WRITE) as mm:
66+
mm.write(packb(obj))
67+
else:
68+
fd.write(packb(obj))
69+
70+
4471
if __name__ == "__main__":
72+
parser = argparse.ArgumentParser()
73+
parser.add_argument("-n", "--number", type=int, default=25, help="Number of runs")
74+
parser.add_argument(
75+
"-m", "--mapped", action="store_true", help="Use memory mapping"
76+
)
77+
args = parser.parse_args()
78+
4579
_globals = {
4680
"main": main,
4781
"stream": stream,
4882
"other": other,
83+
"mapped": args.mapped,
84+
}
85+
86+
_serialize = {
87+
"main": serialize_main,
88+
"stream": serialize_stream,
89+
"other": serialize_other,
90+
"obj": stream(False),
91+
"mapped": args.mapped,
4992
}
50-
parser = argparse.ArgumentParser()
51-
parser.add_argument("-n", "--number", type=int, default=25, help="Number of runs")
52-
args = parser.parse_args()
5393

54-
t_main = timeit.timeit("main(True)", number=args.number, globals=_globals)
55-
# this needs to be mmap for good performance
56-
t_stream = timeit.timeit("stream(True)", number=args.number, globals=_globals)
57-
t_other = timeit.timeit("other(True)", number=args.number, globals=_globals)
94+
t_main = timeit.timeit("main(mapped)", number=args.number, globals=_globals)
95+
t_stream = timeit.timeit("stream(mapped)", number=args.number, globals=_globals)
96+
t_other = timeit.timeit("other(mapped)", number=args.number, globals=_globals)
5897

5998
print(f"main: {t_main:.6f}s total, {t_main / args.number:.6f}s per call")
6099
print(f"stream: {t_stream:.6f}s total, {t_stream / args.number:.6f}s per call")
61100
print(f"other: {t_other:.6f}s total, {t_other / args.number:.6f}s per call")
101+
102+
t_main_s = timeit.timeit(
103+
"main(obj, mapped)", number=args.number, globals=_serialize
104+
)
105+
t_stream_s = timeit.timeit(
106+
"stream(obj, mapped)", number=args.number, globals=_serialize
107+
)
108+
t_other_s = timeit.timeit(
109+
"other(obj, mapped)", number=args.number, globals=_serialize
110+
)
111+
112+
print(
113+
f"main serialize: {t_main_s:.6f}s total, {t_main_s / args.number:.6f}s per call"
114+
)
115+
print(
116+
f"stream serialize: {t_stream_s:.6f}s total, {t_stream_s / args.number:.6f}s per call"
117+
)
118+
print(
119+
f"other serialize: {t_other_s:.6f}s total, {t_other_s / args.number:.6f}s per call"
120+
)

0 commit comments

Comments
 (0)