pycoder49
diff --git a/‎clients/openai-python/tests/conftest.py‎
Lines changed: 22 additions & 0 deletions b/‎clients/openai-python/tests/conftest.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎clients/openai-python/tests/test_embeddings.py‎
Lines changed: 213 additions & 0 deletions b/‎clients/openai-python/tests/test_embeddings.py‎
Lines changed: 213 additions & 0 deletions
diff --git a/‎clients/openai-python/tests/test_openai_compatibility.py‎
Lines changed: 1 addition & 10 deletions b/‎clients/openai-python/tests/test_openai_compatibility.py‎
Lines changed: 1 addition & 10 deletions
diff --git a/‎gateway/src/main.rs‎
Lines changed: 4 additions & 0 deletions b/‎gateway/src/main.rs‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎internal/tensorzero-node/lib/bindings/EmbeddingModelConfig.ts‎
Lines changed: 4 additions & 2 deletions b/‎internal/tensorzero-node/lib/bindings/EmbeddingModelConfig.ts‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎internal/tensorzero-node/lib/bindings/EmbeddingProviderInfo.ts‎
Lines changed: 9 additions & 0 deletions b/‎internal/tensorzero-node/lib/bindings/EmbeddingProviderInfo.ts‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎internal/tensorzero-node/lib/bindings/index.ts‎
Lines changed: 1 addition & 0 deletions b/‎internal/tensorzero-node/lib/bindings/index.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorzero-core/src/cache.rs‎
Lines changed: 3 additions & 3 deletions b/‎tensorzero-core/src/cache.rs‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎tensorzero-core/src/config_parser/tests.rs‎
Lines changed: 1 addition & 1 deletion b/‎tensorzero-core/src/config_parser/tests.rs‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,22 @@
+# type: ignore
+"""
+Shared test fixtures for TensorZero OpenAI client tests
+"""
+
+import os
+
+import pytest_asyncio
+from openai import AsyncOpenAI
+
+TEST_CONFIG_FILE = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)),
+    "../../../tensorzero-core/tests/e2e/tensorzero.toml",
+)
+
+
+@pytest_asyncio.fixture
+async def async_client():
+    async with AsyncOpenAI(
+        api_key="donotuse", base_url="http://localhost:3000/openai/v1"
+    ) as client:
+        yield client
@@ -0,0 +1,213 @@
+# type: ignore
+"""
+Tests for the TensorZero embeddings API using the OpenAI Python client
+
+These tests cover the embeddings functionality of the TensorZero OpenAI-compatible interface.
+
+To run:
+```
+pytest tests/test_embeddings.py
+```
+or
+```
+uv run pytest tests/test_embeddings.py
+```
+"""
+
+import pytest
+
+
+@pytest.mark.asyncio
+async def test_basic_embeddings(async_client):
+    """Test basic embeddings generation with a single input"""
+    result = await async_client.embeddings.create(
+        input="Hello, world!",
+        model="text-embedding-3-small",
+    )
+
+    # Verify the response structure
+    assert result.model == "text-embedding-3-small"
+    assert len(result.data) == 1
+    assert result.data[0].index == 0
+    assert result.data[0].object == "embedding"
+    assert len(result.data[0].embedding) > 0  # Should have embedding vector
+    assert result.usage.prompt_tokens > 0
+    assert result.usage.total_tokens > 0
+
+
+@pytest.mark.asyncio
+async def test_basic_embeddings_shorthand(async_client):
+    """Test basic embeddings generation with a single input"""
+    result = await async_client.embeddings.create(
+        input="Hello, world!",
+        model="openai::text-embedding-3-large",
+    )
+
+    # Verify the response structure
+    assert result.model == "openai::text-embedding-3-large"
+    assert len(result.data) == 1
+    assert result.data[0].index == 0
+    assert result.data[0].object == "embedding"
+    assert len(result.data[0].embedding) > 0  # Should have embedding vector
+    assert result.usage.prompt_tokens > 0
+    assert result.usage.total_tokens > 0
+
+
+@pytest.mark.asyncio
+async def test_batch_embeddings(async_client):
+    """Test embeddings generation with multiple inputs"""
+    inputs = [
+        "Hello, world!",
+        "How are you today?",
+        "This is a test of batch embeddings.",
+    ]
+
+    result = await async_client.embeddings.create(
+        input=inputs,
+        model="text-embedding-3-small",
+    )
+
+    # Verify the response structure
+    assert result.model == "text-embedding-3-small"
+    assert len(result.data) == len(inputs)
+
+    for i, embedding_data in enumerate(result.data):
+        assert embedding_data.index == i
+        assert embedding_data.object == "embedding"
+        assert len(embedding_data.embedding) > 0
+
+    assert result.usage.prompt_tokens > 0
+    assert result.usage.total_tokens > 0
+
+
+@pytest.mark.asyncio
+async def test_embeddings_with_dimensions(async_client):
+    """Test embeddings with specified dimensions"""
+    result = await async_client.embeddings.create(
+        input="Test with specific dimensions",
+        model="text-embedding-3-small",
+        dimensions=512,
+    )
+
+    # Verify the response structure
+    assert result.model == "text-embedding-3-small"
+    assert len(result.data) == 1
+    # Should match requested dimensions
+    assert len(result.data[0].embedding) == 512
+
+
+@pytest.mark.asyncio
+async def test_embeddings_with_encoding_format_float(async_client):
+    """Test embeddings with different encoding formats"""
+    result = await async_client.embeddings.create(
+        input="Test encoding format",
+        model="text-embedding-3-small",
+        encoding_format="float",
+    )
+
+    # Verify the response structure
+    assert result.model == "text-embedding-3-small"
+    assert len(result.data) == 1
+    assert isinstance(result.data[0].embedding[0], float)
+
+
+@pytest.mark.asyncio
+async def test_embeddings_with_encoding_format_base64(async_client):
+    """Test embeddings with different encoding formats"""
+    result = await async_client.embeddings.create(
+        input="Test encoding format",
+        model="text-embedding-3-small",
+        encoding_format="base64",
+    )
+
+    # Verify the response structure
+    assert result.model == "text-embedding-3-small"
+    assert len(result.data) == 1
+    assert isinstance(result.data[0].embedding, str)
+
+
+@pytest.mark.asyncio
+async def test_embeddings_with_user_parameter(async_client):
+    """Test embeddings with user parameter for tracking"""
+    user_id = "test_user_123"
+    result = await async_client.embeddings.create(
+        input="Test with user parameter",
+        model="text-embedding-3-small",
+        user=user_id,
+    )
+
+    # Verify the response structure
+    assert result.model == "text-embedding-3-small"
+    assert len(result.data) == 1
+    assert len(result.data[0].embedding) > 0
+
+
+@pytest.mark.asyncio
+async def test_embeddings_invalid_model_error(async_client):
+    """Test that invalid model name raises appropriate error"""
+    with pytest.raises(Exception) as exc_info:
+        await async_client.embeddings.create(
+            input="Test invalid model",
+            model="tensorzero::model_name::nonexistent_model",
+        )
+
+    # Should get a 404 error for unknown model
+    assert exc_info.value.status_code == 404
+
+
+@pytest.mark.asyncio
+async def test_embeddings_large_batch(async_client):
+    """Test embeddings with a larger batch of inputs"""
+    # Create a batch of 10 different inputs
+    inputs = [f"This is test input number {i + 1}" for i in range(10)]
+
+    result = await async_client.embeddings.create(
+        input=inputs,
+        model="text-embedding-3-small",
+    )
+
+    # Verify the response structure
+    assert result.model == "text-embedding-3-small"
+    assert len(result.data) == 10
+
+    # Verify each embedding
+    for i, embedding_data in enumerate(result.data):
+        assert embedding_data.index == i
+        assert embedding_data.object == "embedding"
+        assert len(embedding_data.embedding) > 0
+
+    assert result.usage.prompt_tokens > 0
+    assert result.usage.total_tokens > 0
+
+
+@pytest.mark.asyncio
+async def test_embeddings_consistency(async_client):
+    """Test that the same input produces consistent embeddings"""
+    input_text = "This is a consistency test"
+
+    # Generate embeddings twice with the same input
+    result1 = await async_client.embeddings.create(
+        input=input_text,
+        model="text-embedding-3-small",
+    )
+
+    result2 = await async_client.embeddings.create(
+        input=input_text,
+        model="text-embedding-3-small",
+    )
+
+    # Both should have the same model and structure
+    assert result1.model == result2.model
+    assert len(result1.data) == len(result2.data) == 1
+    assert len(result1.data[0].embedding) == len(result2.data[0].embedding)
+
+    # The embeddings should be identical for the same input
+    # (assuming deterministic behavior or proper caching)
+    embedding1 = result1.data[0].embedding
+    embedding2 = result2.data[0].embedding
+
+    # Check that embeddings are similar (allowing for small numerical differences)
+    for i in range(min(10, len(embedding1))):  # Check first 10 dimensions
+        assert abs(embedding1[i] - embedding2[i]) < 0.01, (
+            f"Embeddings differ significantly at index {i}"
+        )
@@ -27,8 +27,7 @@
 from uuid import UUID
 
 import pytest
-import pytest_asyncio
-from openai import AsyncOpenAI, BadRequestError
+from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
 from uuid_utils.compat import uuid7
 
@@ -38,14 +37,6 @@
 )
 
 
-@pytest_asyncio.fixture
-async def async_client():
-    async with AsyncOpenAI(
-        api_key="donotuse", base_url="http://localhost:3000/openai/v1"
-    ) as client:
-        yield client
-
-
 @pytest.mark.asyncio
 async def test_async_basic_inference(async_client):
     messages = [
 
@@ -228,6 +228,10 @@ async fn main() {
             "/openai/v1/chat/completions",
             post(endpoints::openai_compatible::inference_handler),
         )
+        .route(
+            "/openai/v1/embeddings",
+            post(endpoints::openai_compatible::embeddings_handler),
+        )
         .route("/feedback", post(endpoints::feedback::feedback_handler))
         // Everything above this layer has OpenTelemetry tracing enabled
         // Note - we do *not* attach a `OtelInResponseLayer`, as this seems to be incorrect according to the W3C Trace Context spec
 
@@ -1,7 +1,9 @@
 // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
-import type { EmbeddingProviderConfig } from "./EmbeddingProviderConfig";
+import type { EmbeddingProviderInfo } from "./EmbeddingProviderInfo";
+import type { TimeoutsConfig } from "./TimeoutsConfig";
 
 export type EmbeddingModelConfig = {
   routing: Array<string>;
-  providers: { [key in string]?: EmbeddingProviderConfig };
+  providers: { [key in string]?: EmbeddingProviderInfo };
+  timeouts: TimeoutsConfig;
 };
@@ -0,0 +1,9 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+import type { EmbeddingProviderConfig } from "./EmbeddingProviderConfig";
+import type { TimeoutsConfig } from "./TimeoutsConfig";
+
+export type EmbeddingProviderInfo = {
+  inner: EmbeddingProviderConfig;
+  timeouts: TimeoutsConfig;
+  provider_name: string;
+};
@@ -32,6 +32,7 @@ export * from "./DynamicJSONSchema";
 export * from "./DynamicToolConfig";
 export * from "./EmbeddingModelConfig";
 export * from "./EmbeddingProviderConfig";
+export * from "./EmbeddingProviderInfo";
 export * from "./EvaluationConfig";
 export * from "./EvaluatorConfig";
 export * from "./ExactMatchConfig";
 
@@ -2,7 +2,7 @@ use std::collections::HashMap;
 use std::sync::Arc;
 
 use crate::clickhouse::{ClickHouseConnectionInfo, TableName};
-use crate::embeddings::{EmbeddingRequest, EmbeddingResponse};
+use crate::embeddings::{EmbeddingModelResponse, EmbeddingRequest};
 use crate::error::{warn_discarded_cache_write, Error, ErrorDetails};
 use crate::inference::types::file::serialize_with_file_data;
 use crate::inference::types::{
@@ -387,14 +387,14 @@ pub async fn embedding_cache_lookup(
     clickhouse_connection_info: &ClickHouseConnectionInfo,
     request: &EmbeddingModelProviderRequest<'_>,
     max_age_s: Option<u32>,
-) -> Result<Option<EmbeddingResponse>, Error> {
+) -> Result<Option<EmbeddingModelResponse>, Error> {
     let result = cache_lookup_inner::<EmbeddingCacheData>(
         clickhouse_connection_info,
         request.get_cache_key()?,
         max_age_s,
     )
     .await?;
-    Ok(result.map(|result| EmbeddingResponse::from_cache(result, request)))
+    Ok(result.map(|result| EmbeddingModelResponse::from_cache(result, request)))
 }
 
 pub async fn cache_lookup(
 
@@ -124,7 +124,7 @@ async fn test_config_from_toml_table_valid() {
     assert_eq!(embedding_model.routing, vec!["openai".into()]);
     assert_eq!(embedding_model.providers.len(), 1);
     let provider = embedding_model.providers.get("openai").unwrap();
-    assert!(matches!(provider, EmbeddingProviderConfig::OpenAI(_)));
+    assert!(matches!(provider.inner, EmbeddingProviderConfig::OpenAI(_)));
 
     // Check that the function for the LLM Judge evaluation is added to the functions table
     let function = config