diff --git a/docs/learn/how-it-works.md b/docs/learn/how-it-works.md
index d4366df..485f702 100644
--- a/docs/learn/how-it-works.md
+++ b/docs/learn/how-it-works.md
@@ -37,11 +37,20 @@ The request path is literally the `model` string you pass in, joined to `models_
 
 Important: the SDK sends only the raw pixel bytes. It does **not** send image metadata such as width/height/shape, color space, file name, or format.
 
+For `client.models.embed_image(model, image, ...)`:
+
+1. The input image is serialized with `image.tobytes()`.
+2. Bytes are compressed with **LZ4 frame**.
+3. The request includes `x-output-dtype` to let the service return the desired numeric type.
+4. Additional keyword headers are supported and sent as `x-*` headers (e.g. `pool_tokens="false"` becomes `x-pool-tokens: false`; do not include the `x_` prefix in the argument name).
+
 ### What the SDK expects back
 
 - **Classification**: JSON (`response.json()`), typically a float (binary) or a mapping of class → probability.
 - **Segmentation**: a binary payload (response body) that is LZ4-compressed float16 data.
   The SDK decompresses it, interprets it as `np.float16`, and reshapes it to `(num_classes, height, width)`.
+- **Embedding**: an LZ4-compressed binary payload plus an `x-output-shape` header,
+  used to reshape the output array.
 
 The SDK determines `height` and `width` from the input image:
 
diff --git a/rationai/resources/models.py b/rationai/resources/models.py
index 3a297a7..a7d3c25 100644
--- a/rationai/resources/models.py
+++ b/rationai/resources/models.py
@@ -70,6 +70,7 @@ def embed_image[DType: np.generic](
         image: Image | NDArray[np.uint8],
         output_dtype: type[DType] = np.float32,  # type: ignore[assignment]
         timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
+        **headers: str,
     ) -> NDArray[DType]:
         """Compute an embedding vector for an image using the specified model.
 
@@ -78,16 +79,24 @@ def embed_image[DType: np.generic](
             image: The image to embed. It must be uint8 RGB image.
             output_dtype: Output numpy dtype for embeddings (e.g. np.float16, np.float32).
             timeout: Optional timeout for the request.
+            **headers: Additional x- headers. Keyword underscores are converted
+                to hyphens and prefixed with 'x-', e.g. pool_tokens="false"
+                becomes x-pool-tokens: false.
 
         Returns:
             NDArray[DType]: The embedding array reshaped according to
                 the `x-output-shape` response header.
         """
         compressed_data = lz4.frame.compress(image.tobytes())
+        request_headers = {"x-output-dtype": np.dtype(output_dtype).name}
+        request_headers.update(
+            {f"x-{k.replace('_', '-')}": v for k, v in headers.items()}
+        )
+
         response = self._post(
             model,
             data=compressed_data,
-            headers={"x-output-dtype": np.dtype(output_dtype).name},
+            headers=request_headers,
             timeout=timeout,
         )
         response.raise_for_status()
@@ -160,6 +169,7 @@ async def embed_image[DType: np.generic](
         image: Image | NDArray[np.uint8],
         output_dtype: type[DType] = np.float32,  # type: ignore[assignment]
         timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
+        **headers: str,
     ) -> NDArray[DType]:
         """Compute an embedding vector for an image using the specified model.
 
@@ -168,16 +178,24 @@ async def embed_image[DType: np.generic](
             image: The image to embed. It must be uint8 RGB image.
             output_dtype: Output numpy dtype for embeddings (e.g. np.float16, np.float32).
             timeout: Optional timeout for the request.
+            **headers: Additional x- headers. Keyword underscores are converted
+                to hyphens and prefixed with 'x-', e.g. pool_tokens="false"
+                becomes x-pool-tokens: false.
 
         Returns:
             NDArray[DType]: The embedding array reshaped according to
                 the `x-output-shape` response header.
         """
         compressed_data = lz4.frame.compress(image.tobytes())
+        request_headers = {"x-output-dtype": np.dtype(output_dtype).name}
+        request_headers.update(
+            {f"x-{k.replace('_', '-')}": v for k, v in headers.items()}
+        )
+
         response = await self._post(
             model,
             data=compressed_data,
-            headers={"x-output-dtype": np.dtype(output_dtype).name},
+            headers=request_headers,
             timeout=timeout,
         )
         response.raise_for_status()