From 2e9636ee98616f92c10d3c7c9d9eb2ea5b52c94d Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sat, 11 Apr 2026 16:42:24 +0200 Subject: [PATCH 1/3] feat: pool tokens --- docs/learn/how-it-works.md | 9 +++++++++ rationai/resources/models.py | 22 ++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/docs/learn/how-it-works.md b/docs/learn/how-it-works.md index d4366df..485f702 100644 --- a/docs/learn/how-it-works.md +++ b/docs/learn/how-it-works.md @@ -37,11 +37,20 @@ The request path is literally the `model` string you pass in, joined to `models_ Important: the SDK sends only the raw pixel bytes. It does **not** send image metadata such as width/height/shape, color space, file name, or format. +For `client.models.embed_image(model, image, ...)`: + +1. The input image is serialized with `image.tobytes()`. +2. Bytes are compressed with **LZ4 frame**. +3. The request includes `x-output-dtype` to let the service return the desired numeric type. +4. Additional keyword headers are supported and sent as `x-*` headers (e.g. `pool_tokens="false"` becomes `x-pool-tokens: false`; do not include the `x_` prefix in the argument name). + ### What the SDK expects back - **Classification**: JSON (`response.json()`), typically a float (binary) or a mapping of class → probability. - **Segmentation**: a binary payload (response body) that is LZ4-compressed float16 data. The SDK decompresses it, interprets it as `np.float16`, and reshapes it to `(num_classes, height, width)`. +- **Embedding**: an LZ4-compressed binary payload plus an `x-output-shape` header, + used to reshape the output array. The SDK determines `height` and `width` from the input image: diff --git a/rationai/resources/models.py b/rationai/resources/models.py index 3a297a7..bc5dde0 100644 --- a/rationai/resources/models.py +++ b/rationai/resources/models.py @@ -70,6 +70,7 @@ def embed_image[DType: np.generic]( image: Image | NDArray[np.uint8], output_dtype: type[DType] = np.float32, # type: ignore[assignment] timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT, + **headers: str, ) -> NDArray[DType]: """Compute an embedding vector for an image using the specified model. @@ -78,16 +79,24 @@ def embed_image[DType: np.generic]( image: The image to embed. It must be uint8 RGB image. output_dtype: Output numpy dtype for embeddings (e.g. np.float16, np.float32). timeout: Optional timeout for the request. + **headers: Additional x- headers. Keyword underscores are converted + to hyphens, e.g. pool_tokens="false" becomes + pool-tokens: false. Returns: NDArray[DType]: The embedding array reshaped according to the `x-output-shape` response header. """ compressed_data = lz4.frame.compress(image.tobytes()) + request_headers = {"x-output-dtype": np.dtype(output_dtype).name} + request_headers.update( + {f"x-{k.replace('_', '-')}": v for k, v in headers.items()} + ) + response = self._post( model, data=compressed_data, - headers={"x-output-dtype": np.dtype(output_dtype).name}, + headers=request_headers, timeout=timeout, ) response.raise_for_status() @@ -160,6 +169,7 @@ async def embed_image[DType: np.generic]( image: Image | NDArray[np.uint8], output_dtype: type[DType] = np.float32, # type: ignore[assignment] timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT, + **headers: str, ) -> NDArray[DType]: """Compute an embedding vector for an image using the specified model. @@ -168,16 +178,24 @@ async def embed_image[DType: np.generic]( image: The image to embed. It must be uint8 RGB image. output_dtype: Output numpy dtype for embeddings (e.g. np.float16, np.float32). timeout: Optional timeout for the request. + **headers: Additional x- headers. Keyword underscores are converted + to hyphens, e.g. pool_tokens="false" becomes + pool-tokens: false. Returns: NDArray[DType]: The embedding array reshaped according to the `x-output-shape` response header. """ compressed_data = lz4.frame.compress(image.tobytes()) + request_headers = {"x-output-dtype": np.dtype(output_dtype).name} + request_headers.update( + {f"x-{k.replace('_', '-')}": v for k, v in headers.items()} + ) + response = await self._post( model, data=compressed_data, - headers={"x-output-dtype": np.dtype(output_dtype).name}, + headers=request_headers, timeout=timeout, ) response.raise_for_status() From 56a82626b9305e289b1262a5d05682fdb091e44c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20=C5=A0t=C3=ADpek?= <91186480+Jurgee@users.noreply.github.com> Date: Sat, 11 Apr 2026 16:45:24 +0200 Subject: [PATCH 2/3] Update rationai/resources/models.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- rationai/resources/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rationai/resources/models.py b/rationai/resources/models.py index bc5dde0..cb66485 100644 --- a/rationai/resources/models.py +++ b/rationai/resources/models.py @@ -80,8 +80,8 @@ def embed_image[DType: np.generic]( output_dtype: Output numpy dtype for embeddings (e.g. np.float16, np.float32). timeout: Optional timeout for the request. **headers: Additional x- headers. Keyword underscores are converted - to hyphens, e.g. pool_tokens="false" becomes - pool-tokens: false. + to hyphens and prefixed with 'x-', e.g. pool_tokens="false" + becomes x-pool-tokens: false. Returns: NDArray[DType]: The embedding array reshaped according to From b5cef7ecf30e98c1251c7fbdd480389f348e9fc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20=C5=A0t=C3=ADpek?= <91186480+Jurgee@users.noreply.github.com> Date: Sat, 11 Apr 2026 16:45:32 +0200 Subject: [PATCH 3/3] Update rationai/resources/models.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- rationai/resources/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rationai/resources/models.py b/rationai/resources/models.py index cb66485..a7d3c25 100644 --- a/rationai/resources/models.py +++ b/rationai/resources/models.py @@ -179,8 +179,8 @@ async def embed_image[DType: np.generic]( output_dtype: Output numpy dtype for embeddings (e.g. np.float16, np.float32). timeout: Optional timeout for the request. **headers: Additional x- headers. Keyword underscores are converted - to hyphens, e.g. pool_tokens="false" becomes - pool-tokens: false. + to hyphens and prefixed with 'x-', e.g. pool_tokens="false" + becomes x-pool-tokens: false. Returns: NDArray[DType]: The embedding array reshaped according to