Skip to content

Commit 13b57e6

Browse files
authored
Merge branch 'main' into feat/configurable-embed-batch-size
2 parents 2d337a3 + 0d97d48 commit 13b57e6

12 files changed

Lines changed: 148 additions & 183 deletions

.fern/metadata.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@
99
"fastavro": "^1.9.4",
1010
"requests": "^2.0.0",
1111
"types-requests": "^2.0.0",
12-
"tokenizers": ">=0.15,<1",
13-
"httpx-sse": "^0.4.0"
12+
"tokenizers": ">=0.15,<1"
1413
},
1514
"improved_imports": true,
1615
"pydantic_config": {

poetry.lock

Lines changed: 111 additions & 120 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "cohere"
33

44
[tool.poetry]
55
name = "cohere"
6-
version = "5.20.1"
6+
version = "5.20.2"
77
description = ""
88
readme = "README.md"
99
authors = []
@@ -38,7 +38,6 @@ Repository = 'https://github.com/cohere-ai/cohere-python'
3838
python = "^3.9"
3939
fastavro = "^1.9.4"
4040
httpx = ">=0.21.2"
41-
httpx-sse = "^0.4.0"
4241
pydantic = ">= 1.9.2"
4342
pydantic-core = ">=2.18.2"
4443
requests = "^2.0.0"

reference.md

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1615,7 +1615,7 @@ client.rerank(
16151615
],
16161616
query="What is the capital of the United States?",
16171617
top_n=3,
1618-
model="rerank-v3.5",
1618+
model="rerank-v4.0-pro",
16191619
)
16201620

16211621
```
@@ -2492,10 +2492,7 @@ If tool_choice isn't specified, then the model is free to choose whether to use
24922492
<dl>
24932493
<dd>
24942494

2495-
**priority:** `typing.Optional[int]`
2496-
2497-
The priority of the request (lower means earlier handling; default 0 highest priority).
2498-
Higher priority requests are handled first, and dropped last when the system is under load.
2495+
**priority:** `typing.Optional[int]` — Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
24992496

25002497
</dd>
25012498
</dl>
@@ -2793,10 +2790,7 @@ If tool_choice isn't specified, then the model is free to choose whether to use
27932790
<dl>
27942791
<dd>
27952792

2796-
**priority:** `typing.Optional[int]`
2797-
2798-
The priority of the request (lower means earlier handling; default 0 highest priority).
2799-
Higher priority requests are handled first, and dropped last when the system is under load.
2793+
**priority:** `typing.Optional[int]` — Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
28002794

28012795
</dd>
28022796
</dl>
@@ -2972,10 +2966,7 @@ If `NONE` is selected, when the input exceeds the maximum input token length an
29722966
<dl>
29732967
<dd>
29742968

2975-
**priority:** `typing.Optional[int]`
2976-
2977-
The priority of the request (lower means earlier handling; default 0 highest priority).
2978-
Higher priority requests are handled first, and dropped last when the system is under load.
2969+
**priority:** `typing.Optional[int]` — Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
29792970

29802971
</dd>
29812972
</dl>
@@ -3038,7 +3029,7 @@ client.v2.rerank(
30383029
],
30393030
query="What is the capital of the United States?",
30403031
top_n=3,
3041-
model="rerank-v3.5",
3032+
model="rerank-v4.0-pro",
30423033
)
30433034

30443035
```
@@ -3102,10 +3093,7 @@ For optimal performance we recommend against sending more than 1,000 documents i
31023093
<dl>
31033094
<dd>
31043095

3105-
**priority:** `typing.Optional[int]`
3106-
3107-
The priority of the request (lower means earlier handling; default 0 highest priority).
3108-
Higher priority requests are handled first, and dropped last when the system is under load.
3096+
**priority:** `typing.Optional[int]` — Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
31093097

31103098
</dd>
31113099
</dl>

requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
fastavro==1.9.4
22
httpx>=0.21.2
3-
httpx-sse==0.4.0
43
pydantic>= 1.9.2
54
pydantic-core>=2.18.2
65
requests==2.0.0

src/cohere/base_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1309,7 +1309,7 @@ def rerank(
13091309
],
13101310
query="What is the capital of the United States?",
13111311
top_n=3,
1312-
model="rerank-v3.5",
1312+
model="rerank-v4.0-pro",
13131313
)
13141314
"""
13151315
_response = self._raw_client.rerank(
@@ -2859,7 +2859,7 @@ async def main() -> None:
28592859
],
28602860
query="What is the capital of the United States?",
28612861
top_n=3,
2862-
model="rerank-v3.5",
2862+
model="rerank-v4.0-pro",
28632863
)
28642864
28652865

src/cohere/core/client_wrapper.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ def __init__(
2424

2525
def get_headers(self) -> typing.Dict[str, str]:
2626
headers: typing.Dict[str, str] = {
27-
"User-Agent": "cohere/5.20.1",
27+
"User-Agent": "cohere/5.20.2",
2828
"X-Fern-Language": "Python",
2929
"X-Fern-SDK-Name": "cohere",
30-
"X-Fern-SDK-Version": "5.20.1",
30+
"X-Fern-SDK-Version": "5.20.2",
3131
**(self.get_custom_headers() or {}),
3232
}
3333
if self._client_name is not None:

src/cohere/types/api_meta_billed_units.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ class ApiMetaBilledUnits(UncheckedBaseModel):
1818
The number of billed input tokens.
1919
"""
2020

21+
image_tokens: typing.Optional[float] = pydantic.Field(default=None)
22+
"""
23+
The number of billed image tokens.
24+
"""
25+
2126
output_tokens: typing.Optional[float] = pydantic.Field(default=None)
2227
"""
2328
The number of billed output tokens.

src/cohere/v2/client.py

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,7 @@ def chat_stream(
160160
thinking : typing.Optional[Thinking]
161161
162162
priority : typing.Optional[int]
163-
The priority of the request (lower means earlier handling; default 0 highest priority).
164-
Higher priority requests are handled first, and dropped last when the system is under load.
163+
Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
165164
166165
request_options : typing.Optional[RequestOptions]
167166
Request-specific configuration.
@@ -331,8 +330,7 @@ def chat(
331330
thinking : typing.Optional[Thinking]
332331
333332
priority : typing.Optional[int]
334-
The priority of the request (lower means earlier handling; default 0 highest priority).
335-
Higher priority requests are handled first, and dropped last when the system is under load.
333+
Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
336334
337335
request_options : typing.Optional[RequestOptions]
338336
Request-specific configuration.
@@ -451,8 +449,7 @@ def embed(
451449
If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
452450
453451
priority : typing.Optional[int]
454-
The priority of the request (lower means earlier handling; default 0 highest priority).
455-
Higher priority requests are handled first, and dropped last when the system is under load.
452+
Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
456453
457454
request_options : typing.Optional[RequestOptions]
458455
Request-specific configuration.
@@ -650,8 +647,7 @@ def rerank(
650647
Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.
651648
652649
priority : typing.Optional[int]
653-
The priority of the request (lower means earlier handling; default 0 highest priority).
654-
Higher priority requests are handled first, and dropped last when the system is under load.
650+
Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
655651
656652
request_options : typing.Optional[RequestOptions]
657653
Request-specific configuration.
@@ -679,7 +675,7 @@ def rerank(
679675
],
680676
query="What is the capital of the United States?",
681677
top_n=3,
682-
model="rerank-v3.5",
678+
model="rerank-v4.0-pro",
683679
)
684680
"""
685681
_response = self._raw_client.rerank(
@@ -825,8 +821,7 @@ async def chat_stream(
825821
thinking : typing.Optional[Thinking]
826822
827823
priority : typing.Optional[int]
828-
The priority of the request (lower means earlier handling; default 0 highest priority).
829-
Higher priority requests are handled first, and dropped last when the system is under load.
824+
Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
830825
831826
request_options : typing.Optional[RequestOptions]
832827
Request-specific configuration.
@@ -1005,8 +1000,7 @@ async def chat(
10051000
thinking : typing.Optional[Thinking]
10061001
10071002
priority : typing.Optional[int]
1008-
The priority of the request (lower means earlier handling; default 0 highest priority).
1009-
Higher priority requests are handled first, and dropped last when the system is under load.
1003+
Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
10101004
10111005
request_options : typing.Optional[RequestOptions]
10121006
Request-specific configuration.
@@ -1133,8 +1127,7 @@ async def embed(
11331127
If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
11341128
11351129
priority : typing.Optional[int]
1136-
The priority of the request (lower means earlier handling; default 0 highest priority).
1137-
Higher priority requests are handled first, and dropped last when the system is under load.
1130+
Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
11381131
11391132
request_options : typing.Optional[RequestOptions]
11401133
Request-specific configuration.
@@ -1219,8 +1212,7 @@ async def rerank(
12191212
Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.
12201213
12211214
priority : typing.Optional[int]
1222-
The priority of the request (lower means earlier handling; default 0 highest priority).
1223-
Higher priority requests are handled first, and dropped last when the system is under load.
1215+
Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
12241216
12251217
request_options : typing.Optional[RequestOptions]
12261218
Request-specific configuration.
@@ -1253,7 +1245,7 @@ async def main() -> None:
12531245
],
12541246
query="What is the capital of the United States?",
12551247
top_n=3,
1256-
model="rerank-v3.5",
1248+
model="rerank-v4.0-pro",
12571249
)
12581250
12591251

src/cohere/v2/raw_client.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,7 @@ def chat_stream(
169169
thinking : typing.Optional[Thinking]
170170
171171
priority : typing.Optional[int]
172-
The priority of the request (lower means earlier handling; default 0 highest priority).
173-
Higher priority requests are handled first, and dropped last when the system is under load.
172+
Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
174173
175174
request_options : typing.Optional[RequestOptions]
176175
Request-specific configuration.
@@ -513,8 +512,7 @@ def chat(
513512
thinking : typing.Optional[Thinking]
514513
515514
priority : typing.Optional[int]
516-
The priority of the request (lower means earlier handling; default 0 highest priority).
517-
Higher priority requests are handled first, and dropped last when the system is under load.
515+
Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
518516
519517
request_options : typing.Optional[RequestOptions]
520518
Request-specific configuration.
@@ -782,8 +780,7 @@ def embed(
782780
If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
783781
784782
priority : typing.Optional[int]
785-
The priority of the request (lower means earlier handling; default 0 highest priority).
786-
Higher priority requests are handled first, and dropped last when the system is under load.
783+
Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
787784
788785
request_options : typing.Optional[RequestOptions]
789786
Request-specific configuration.
@@ -1000,8 +997,7 @@ def rerank(
1000997
Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.
1001998
1002999
priority : typing.Optional[int]
1003-
The priority of the request (lower means earlier handling; default 0 highest priority).
1004-
Higher priority requests are handled first, and dropped last when the system is under load.
1000+
Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
10051001
10061002
request_options : typing.Optional[RequestOptions]
10071003
Request-specific configuration.
@@ -1297,8 +1293,7 @@ async def chat_stream(
12971293
thinking : typing.Optional[Thinking]
12981294
12991295
priority : typing.Optional[int]
1300-
The priority of the request (lower means earlier handling; default 0 highest priority).
1301-
Higher priority requests are handled first, and dropped last when the system is under load.
1296+
Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
13021297
13031298
request_options : typing.Optional[RequestOptions]
13041299
Request-specific configuration.
@@ -1641,8 +1636,7 @@ async def chat(
16411636
thinking : typing.Optional[Thinking]
16421637
16431638
priority : typing.Optional[int]
1644-
The priority of the request (lower means earlier handling; default 0 highest priority).
1645-
Higher priority requests are handled first, and dropped last when the system is under load.
1639+
Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
16461640
16471641
request_options : typing.Optional[RequestOptions]
16481642
Request-specific configuration.
@@ -1910,8 +1904,7 @@ async def embed(
19101904
If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
19111905
19121906
priority : typing.Optional[int]
1913-
The priority of the request (lower means earlier handling; default 0 highest priority).
1914-
Higher priority requests are handled first, and dropped last when the system is under load.
1907+
Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
19151908
19161909
request_options : typing.Optional[RequestOptions]
19171910
Request-specific configuration.
@@ -2128,8 +2121,7 @@ async def rerank(
21282121
Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.
21292122
21302123
priority : typing.Optional[int]
2131-
The priority of the request (lower means earlier handling; default 0 highest priority).
2132-
Higher priority requests are handled first, and dropped last when the system is under load.
2124+
Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
21332125
21342126
request_options : typing.Optional[RequestOptions]
21352127
Request-specific configuration.

0 commit comments

Comments
 (0)