Skip to content

Commit d46c711

Browse files
perf(client): optimize file structure copying in multipart requests
1 parent e91bc38 commit d46c711

9 files changed

Lines changed: 181 additions & 99 deletions

File tree

src/kernel/_files.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
import io
44
import os
55
import pathlib
6-
from typing import overload
7-
from typing_extensions import TypeGuard
6+
from typing import Sequence, cast, overload
7+
from typing_extensions import TypeVar, TypeGuard
88

99
import anyio
1010

@@ -17,7 +17,9 @@
1717
HttpxFileContent,
1818
HttpxRequestFiles,
1919
)
20-
from ._utils import is_tuple_t, is_mapping_t, is_sequence_t
20+
from ._utils import is_list, is_mapping, is_tuple_t, is_mapping_t, is_sequence_t
21+
22+
_T = TypeVar("_T")
2123

2224

2325
def is_base64_file_input(obj: object) -> TypeGuard[Base64FileInput]:
@@ -121,3 +123,51 @@ async def async_read_file_content(file: FileContent) -> HttpxFileContent:
121123
return await anyio.Path(file).read_bytes()
122124

123125
return file
126+
127+
128+
def deepcopy_with_paths(item: _T, paths: Sequence[Sequence[str]]) -> _T:
129+
"""Copy only the containers along the given paths.
130+
131+
Used to guard against mutation by extract_files without copying the entire structure.
132+
Only dicts and lists that lie on a path are copied; everything else
133+
is returned by reference.
134+
135+
For example, given paths=[["foo", "files", "file"]] and the structure:
136+
{
137+
"foo": {
138+
"bar": {"baz": {}},
139+
"files": {"file": <content>}
140+
}
141+
}
142+
The root dict, "foo", and "files" are copied (they lie on the path).
143+
"bar" and "baz" are returned by reference (off the path).
144+
"""
145+
return _deepcopy_with_paths(item, paths, 0)
146+
147+
148+
def _deepcopy_with_paths(item: _T, paths: Sequence[Sequence[str]], index: int) -> _T:
149+
if not paths:
150+
return item
151+
if is_mapping(item):
152+
key_to_paths: dict[str, list[Sequence[str]]] = {}
153+
for path in paths:
154+
if index < len(path):
155+
key_to_paths.setdefault(path[index], []).append(path)
156+
157+
# if no path continues through this mapping, it won't be mutated and copying it is redundant
158+
if not key_to_paths:
159+
return item
160+
161+
result = dict(item)
162+
for key, subpaths in key_to_paths.items():
163+
if key in result:
164+
result[key] = _deepcopy_with_paths(result[key], subpaths, index + 1)
165+
return cast(_T, result)
166+
if is_list(item):
167+
array_paths = [path for path in paths if index < len(path) and path[index] == "<array>"]
168+
169+
# if no path expects a list here, nothing will be mutated inside it - return by reference
170+
if not array_paths:
171+
return cast(_T, item)
172+
return cast(_T, [_deepcopy_with_paths(entry, array_paths, index + 1) for entry in item])
173+
return item

src/kernel/_utils/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
coerce_integer as coerce_integer,
2525
file_from_path as file_from_path,
2626
strip_not_given as strip_not_given,
27-
deepcopy_minimal as deepcopy_minimal,
2827
get_async_library as get_async_library,
2928
maybe_coerce_float as maybe_coerce_float,
3029
get_required_header as get_required_header,

src/kernel/_utils/_utils.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -177,21 +177,6 @@ def is_iterable(obj: object) -> TypeGuard[Iterable[object]]:
177177
return isinstance(obj, Iterable)
178178

179179

180-
def deepcopy_minimal(item: _T) -> _T:
181-
"""Minimal reimplementation of copy.deepcopy() that will only copy certain object types:
182-
183-
- mappings, e.g. `dict`
184-
- list
185-
186-
This is done for performance reasons.
187-
"""
188-
if is_mapping(item):
189-
return cast(_T, {k: deepcopy_minimal(v) for k, v in item.items()})
190-
if is_list(item):
191-
return cast(_T, [deepcopy_minimal(entry) for entry in item])
192-
return item
193-
194-
195180
# copied from https://github.com/Rapptz/RoboDanny
196181
def human_join(seq: Sequence[str], *, delim: str = ", ", final: str = "or") -> str:
197182
size = len(seq)

src/kernel/resources/browsers/browsers.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,9 @@
4949
ReplaysResourceWithStreamingResponse,
5050
AsyncReplaysResourceWithStreamingResponse,
5151
)
52+
from ..._files import deepcopy_with_paths
5253
from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
53-
from ..._utils import extract_files, path_template, maybe_transform, deepcopy_minimal, async_maybe_transform
54+
from ..._utils import extract_files, path_template, maybe_transform, async_maybe_transform
5455
from .computer import (
5556
ComputerResource,
5657
AsyncComputerResource,
@@ -573,7 +574,7 @@ def load_extensions(
573574
if not id:
574575
raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
575576
extra_headers = {"Accept": "*/*", **(extra_headers or {})}
576-
body = deepcopy_minimal({"extensions": extensions})
577+
body = deepcopy_with_paths({"extensions": extensions}, [["extensions", "<array>", "zip_file"]])
577578
files = extract_files(cast(Mapping[str, object], body), paths=[["extensions", "<array>", "zip_file"]])
578579
# It should be noted that the actual Content-Type header that will be
579580
# sent to the server will contain a `boundary` parameter, e.g.
@@ -1075,7 +1076,7 @@ async def load_extensions(
10751076
if not id:
10761077
raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
10771078
extra_headers = {"Accept": "*/*", **(extra_headers or {})}
1078-
body = deepcopy_minimal({"extensions": extensions})
1079+
body = deepcopy_with_paths({"extensions": extensions}, [["extensions", "<array>", "zip_file"]])
10791080
files = extract_files(cast(Mapping[str, object], body), paths=[["extensions", "<array>", "zip_file"]])
10801081
# It should be noted that the actual Content-Type header that will be
10811082
# sent to the server will contain a `boundary` parameter, e.g.

src/kernel/resources/browsers/fs/fs.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
WatchResourceWithStreamingResponse,
1616
AsyncWatchResourceWithStreamingResponse,
1717
)
18-
from ...._files import read_file_content, async_read_file_content
18+
from ...._files import read_file_content, deepcopy_with_paths, async_read_file_content
1919
from ...._types import (
2020
Body,
2121
Omit,
@@ -30,7 +30,7 @@
3030
omit,
3131
not_given,
3232
)
33-
from ...._utils import extract_files, path_template, maybe_transform, deepcopy_minimal, async_maybe_transform
33+
from ...._utils import extract_files, path_template, maybe_transform, async_maybe_transform
3434
from ...._compat import cached_property
3535
from ...._resource import SyncAPIResource, AsyncAPIResource
3636
from ...._response import (
@@ -509,7 +509,7 @@ def upload(
509509
if not id:
510510
raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
511511
extra_headers = {"Accept": "*/*", **(extra_headers or {})}
512-
body = deepcopy_minimal({"files": files})
512+
body = deepcopy_with_paths({"files": files}, [["files", "<array>", "file"]])
513513
extracted_files = extract_files(cast(Mapping[str, object], body), paths=[["files", "<array>", "file"]])
514514
# It should be noted that the actual Content-Type header that will be
515515
# sent to the server will contain a `boundary` parameter, e.g.
@@ -555,11 +555,12 @@ def upload_zip(
555555
if not id:
556556
raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
557557
extra_headers = {"Accept": "*/*", **(extra_headers or {})}
558-
body = deepcopy_minimal(
558+
body = deepcopy_with_paths(
559559
{
560560
"dest_path": dest_path,
561561
"zip_file": zip_file,
562-
}
562+
},
563+
[["zip_file"]],
563564
)
564565
files = extract_files(cast(Mapping[str, object], body), paths=[["zip_file"]])
565566
# It should be noted that the actual Content-Type header that will be
@@ -1071,7 +1072,7 @@ async def upload(
10711072
if not id:
10721073
raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
10731074
extra_headers = {"Accept": "*/*", **(extra_headers or {})}
1074-
body = deepcopy_minimal({"files": files})
1075+
body = deepcopy_with_paths({"files": files}, [["files", "<array>", "file"]])
10751076
extracted_files = extract_files(cast(Mapping[str, object], body), paths=[["files", "<array>", "file"]])
10761077
# It should be noted that the actual Content-Type header that will be
10771078
# sent to the server will contain a `boundary` parameter, e.g.
@@ -1117,11 +1118,12 @@ async def upload_zip(
11171118
if not id:
11181119
raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
11191120
extra_headers = {"Accept": "*/*", **(extra_headers or {})}
1120-
body = deepcopy_minimal(
1121+
body = deepcopy_with_paths(
11211122
{
11221123
"dest_path": dest_path,
11231124
"zip_file": zip_file,
1124-
}
1125+
},
1126+
[["zip_file"]],
11251127
)
11261128
files = extract_files(cast(Mapping[str, object], body), paths=[["zip_file"]])
11271129
# It should be noted that the actual Content-Type header that will be

src/kernel/resources/deployments.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@
88
import httpx
99

1010
from ..types import deployment_list_params, deployment_create_params, deployment_follow_params
11+
from .._files import deepcopy_with_paths
1112
from .._types import Body, Omit, Query, Headers, NoneType, NotGiven, FileTypes, omit, not_given
12-
from .._utils import extract_files, path_template, maybe_transform, deepcopy_minimal, async_maybe_transform
13+
from .._utils import extract_files, path_template, maybe_transform, async_maybe_transform
1314
from .._compat import cached_property
1415
from .._resource import SyncAPIResource, AsyncAPIResource
1516
from .._response import (
@@ -95,7 +96,7 @@ def create(
9596
9697
timeout: Override the client-level default timeout for this request, in seconds
9798
"""
98-
body = deepcopy_minimal(
99+
body = deepcopy_with_paths(
99100
{
100101
"entrypoint_rel_path": entrypoint_rel_path,
101102
"env_vars": env_vars,
@@ -104,7 +105,8 @@ def create(
104105
"region": region,
105106
"source": source,
106107
"version": version,
107-
}
108+
},
109+
[["file"]],
108110
)
109111
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
110112
# It should be noted that the actual Content-Type header that will be
@@ -360,7 +362,7 @@ async def create(
360362
361363
timeout: Override the client-level default timeout for this request, in seconds
362364
"""
363-
body = deepcopy_minimal(
365+
body = deepcopy_with_paths(
364366
{
365367
"entrypoint_rel_path": entrypoint_rel_path,
366368
"env_vars": env_vars,
@@ -369,7 +371,8 @@ async def create(
369371
"region": region,
370372
"source": source,
371373
"version": version,
372-
}
374+
},
375+
[["file"]],
373376
)
374377
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
375378
# It should be noted that the actual Content-Type header that will be

src/kernel/resources/extensions.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@
88
import httpx
99

1010
from ..types import extension_upload_params, extension_download_from_chrome_store_params
11+
from .._files import deepcopy_with_paths
1112
from .._types import Body, Omit, Query, Headers, NoneType, NotGiven, FileTypes, omit, not_given
12-
from .._utils import extract_files, path_template, maybe_transform, deepcopy_minimal, async_maybe_transform
13+
from .._utils import extract_files, path_template, maybe_transform, async_maybe_transform
1314
from .._compat import cached_property
1415
from .._resource import SyncAPIResource, AsyncAPIResource
1516
from .._response import (
@@ -220,11 +221,12 @@ def upload(
220221
221222
timeout: Override the client-level default timeout for this request, in seconds
222223
"""
223-
body = deepcopy_minimal(
224+
body = deepcopy_with_paths(
224225
{
225226
"file": file,
226227
"name": name,
227-
}
228+
},
229+
[["file"]],
228230
)
229231
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
230232
# It should be noted that the actual Content-Type header that will be
@@ -429,11 +431,12 @@ async def upload(
429431
430432
timeout: Override the client-level default timeout for this request, in seconds
431433
"""
432-
body = deepcopy_minimal(
434+
body = deepcopy_with_paths(
433435
{
434436
"file": file,
435437
"name": name,
436-
}
438+
},
439+
[["file"]],
437440
)
438441
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
439442
# It should be noted that the actual Content-Type header that will be

tests/test_deepcopy.py

Lines changed: 0 additions & 58 deletions
This file was deleted.

0 commit comments

Comments
 (0)