Skip to content

Commit 1f9eed3

Browse files
perf(client): optimize file structure copying in multipart requests
1 parent bc21229 commit 1f9eed3

8 files changed

Lines changed: 167 additions & 113 deletions

File tree

src/anthropic/_files.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
import io
44
import os
55
import pathlib
6-
from typing import overload
7-
from typing_extensions import TypeGuard
6+
from typing import Sequence, cast, overload
7+
from typing_extensions import TypeVar, TypeGuard
88

99
import anyio
1010

@@ -17,7 +17,9 @@
1717
HttpxFileContent,
1818
HttpxRequestFiles,
1919
)
20-
from ._utils import is_tuple_t, is_mapping_t, is_sequence_t
20+
from ._utils import is_list, is_mapping, is_tuple_t, is_mapping_t, is_sequence_t
21+
22+
_T = TypeVar("_T")
2123

2224

2325
def is_base64_file_input(obj: object) -> TypeGuard[Base64FileInput]:
@@ -121,3 +123,51 @@ async def async_read_file_content(file: FileContent) -> HttpxFileContent:
121123
return await anyio.Path(file).read_bytes()
122124

123125
return file
126+
127+
128+
def deepcopy_with_paths(item: _T, paths: Sequence[Sequence[str]]) -> _T:
129+
"""Copy only the containers along the given paths.
130+
131+
Used to guard against mutation by extract_files without copying the entire structure.
132+
Only dicts and lists that lie on a path are copied; everything else
133+
is returned by reference.
134+
135+
For example, given paths=[["foo", "files", "file"]] and the structure:
136+
{
137+
"foo": {
138+
"bar": {"baz": {}},
139+
"files": {"file": <content>}
140+
}
141+
}
142+
The root dict, "foo", and "files" are copied (they lie on the path).
143+
"bar" and "baz" are returned by reference (off the path).
144+
"""
145+
return _deepcopy_with_paths(item, paths, 0)
146+
147+
148+
def _deepcopy_with_paths(item: _T, paths: Sequence[Sequence[str]], index: int) -> _T:
149+
if not paths:
150+
return item
151+
if is_mapping(item):
152+
key_to_paths: dict[str, list[Sequence[str]]] = {}
153+
for path in paths:
154+
if index < len(path):
155+
key_to_paths.setdefault(path[index], []).append(path)
156+
157+
# if no path continues through this mapping, it won't be mutated and copying it is redundant
158+
if not key_to_paths:
159+
return item
160+
161+
result = dict(item)
162+
for key, subpaths in key_to_paths.items():
163+
if key in result:
164+
result[key] = _deepcopy_with_paths(result[key], subpaths, index + 1)
165+
return cast(_T, result)
166+
if is_list(item):
167+
array_paths = [path for path in paths if index < len(path) and path[index] == "<array>"]
168+
169+
# if no path expects a list here, nothing will be mutated inside it - return by reference
170+
if not array_paths:
171+
return cast(_T, item)
172+
return cast(_T, [_deepcopy_with_paths(entry, array_paths, index + 1) for entry in item])
173+
return item

src/anthropic/_utils/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
coerce_integer as coerce_integer,
2525
file_from_path as file_from_path,
2626
strip_not_given as strip_not_given,
27-
deepcopy_minimal as deepcopy_minimal,
2827
get_async_library as get_async_library,
2928
maybe_coerce_float as maybe_coerce_float,
3029
get_required_header as get_required_header,

src/anthropic/_utils/_utils.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -177,21 +177,6 @@ def is_iterable(obj: object) -> TypeGuard[Iterable[object]]:
177177
return isinstance(obj, Iterable)
178178

179179

180-
def deepcopy_minimal(item: _T) -> _T:
181-
"""Minimal reimplementation of copy.deepcopy() that will only copy certain object types:
182-
183-
- mappings, e.g. `dict`
184-
- list
185-
186-
This is done for performance reasons.
187-
"""
188-
if is_mapping(item):
189-
return cast(_T, {k: deepcopy_minimal(v) for k, v in item.items()})
190-
if is_list(item):
191-
return cast(_T, [deepcopy_minimal(entry) for entry in item])
192-
return item
193-
194-
195180
# copied from https://github.com/Rapptz/RoboDanny
196181
def human_join(seq: Sequence[str], *, delim: str = ", ", final: str = "or") -> str:
197182
size = len(seq)

src/anthropic/resources/beta/files.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,9 @@
88
import httpx
99

1010
from ... import _legacy_response
11+
from ..._files import deepcopy_with_paths
1112
from ..._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given
12-
from ..._utils import (
13-
is_given,
14-
extract_files,
15-
path_template,
16-
maybe_transform,
17-
strip_not_given,
18-
deepcopy_minimal,
19-
async_maybe_transform,
20-
)
13+
from ..._utils import is_given, extract_files, path_template, maybe_transform, strip_not_given, async_maybe_transform
2114
from ..._compat import cached_property
2215
from ..._resource import SyncAPIResource, AsyncAPIResource
2316
from ..._response import (
@@ -326,7 +319,7 @@ def upload(
326319
}
327320
extra_headers = {"anthropic-beta": "files-api-2025-04-14", **(extra_headers or {})}
328321
extra_headers = {**_stainless_helper_header_from_file(file), **extra_headers}
329-
body = deepcopy_minimal({"file": file})
322+
body = deepcopy_with_paths({"file": file}, [["file"]])
330323
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
331324
# It should be noted that the actual Content-Type header that will be
332325
# sent to the server will contain a `boundary` parameter, e.g.
@@ -626,7 +619,7 @@ async def upload(
626619
}
627620
extra_headers = {"anthropic-beta": "files-api-2025-04-14", **(extra_headers or {})}
628621
extra_headers = {**_stainless_helper_header_from_file(file), **extra_headers}
629-
body = deepcopy_minimal({"file": file})
622+
body = deepcopy_with_paths({"file": file}, [["file"]])
630623
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
631624
# It should be noted that the actual Content-Type header that will be
632625
# sent to the server will contain a `boundary` parameter, e.g.

src/anthropic/resources/beta/skills/skills.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
VersionsWithStreamingResponse,
1717
AsyncVersionsWithStreamingResponse,
1818
)
19+
from ...._files import deepcopy_with_paths
1920
from ...._types import (
2021
Body,
2122
Omit,
@@ -27,15 +28,7 @@
2728
omit,
2829
not_given,
2930
)
30-
from ...._utils import (
31-
is_given,
32-
extract_files,
33-
path_template,
34-
maybe_transform,
35-
strip_not_given,
36-
deepcopy_minimal,
37-
async_maybe_transform,
38-
)
31+
from ...._utils import is_given, extract_files, path_template, maybe_transform, strip_not_given, async_maybe_transform
3932
from ...._compat import cached_property
4033
from ...._resource import SyncAPIResource, AsyncAPIResource
4134
from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
@@ -123,11 +116,12 @@ def create(
123116
**(extra_headers or {}),
124117
}
125118
extra_headers = {"anthropic-beta": "skills-2025-10-02", **(extra_headers or {})}
126-
body = deepcopy_minimal(
119+
body = deepcopy_with_paths(
127120
{
128121
"display_title": display_title,
129122
"files": files,
130-
}
123+
},
124+
[["files", "<array>"]],
131125
)
132126
extracted_files = extract_files(cast(Mapping[str, object], body), paths=[["files", "<array>"]])
133127
# It should be noted that the actual Content-Type header that will be
@@ -394,11 +388,12 @@ async def create(
394388
**(extra_headers or {}),
395389
}
396390
extra_headers = {"anthropic-beta": "skills-2025-10-02", **(extra_headers or {})}
397-
body = deepcopy_minimal(
391+
body = deepcopy_with_paths(
398392
{
399393
"display_title": display_title,
400394
"files": files,
401-
}
395+
},
396+
[["files", "<array>"]],
402397
)
403398
extracted_files = extract_files(cast(Mapping[str, object], body), paths=[["files", "<array>"]])
404399
# It should be noted that the actual Content-Type header that will be

src/anthropic/resources/beta/skills/versions.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import httpx
99

1010
from .... import _legacy_response
11+
from ...._files import deepcopy_with_paths
1112
from ...._types import (
1213
Body,
1314
Omit,
@@ -19,15 +20,7 @@
1920
omit,
2021
not_given,
2122
)
22-
from ...._utils import (
23-
is_given,
24-
extract_files,
25-
path_template,
26-
maybe_transform,
27-
strip_not_given,
28-
deepcopy_minimal,
29-
async_maybe_transform,
30-
)
23+
from ...._utils import is_given, extract_files, path_template, maybe_transform, strip_not_given, async_maybe_transform
3124
from ...._compat import cached_property
3225
from ...._resource import SyncAPIResource, AsyncAPIResource
3326
from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
@@ -112,7 +105,7 @@ def create(
112105
**(extra_headers or {}),
113106
}
114107
extra_headers = {"anthropic-beta": "skills-2025-10-02", **(extra_headers or {})}
115-
body = deepcopy_minimal({"files": files})
108+
body = deepcopy_with_paths({"files": files}, [["files", "<array>"]])
116109
extracted_files = extract_files(cast(Mapping[str, object], body), paths=[["files", "<array>"]])
117110
# It should be noted that the actual Content-Type header that will be
118111
# sent to the server will contain a `boundary` parameter, e.g.
@@ -384,7 +377,7 @@ async def create(
384377
**(extra_headers or {}),
385378
}
386379
extra_headers = {"anthropic-beta": "skills-2025-10-02", **(extra_headers or {})}
387-
body = deepcopy_minimal({"files": files})
380+
body = deepcopy_with_paths({"files": files}, [["files", "<array>"]])
388381
extracted_files = extract_files(cast(Mapping[str, object], body), paths=[["files", "<array>"]])
389382
# It should be noted that the actual Content-Type header that will be
390383
# sent to the server will contain a `boundary` parameter, e.g.

tests/test_deepcopy.py

Lines changed: 0 additions & 58 deletions
This file was deleted.

0 commit comments

Comments
 (0)