Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
## Release History

## [Unreleased]

#### Features Added
* Embeddings and chat clients can now be injected via the new `embeddings_client`
and `chat_client` constructor arguments on `CosmosMemoryClient` and
`AsyncCosmosMemoryClient`. When supplied, the toolkit uses the provided client
instead of building an Azure-backed one, and does not close it (the caller owns
its lifecycle, mirroring the existing credential-ownership behavior). This enables
OpenAI-compatible / self-hosted embedding and chat backends, reuse of a
caller-configured client, and deterministic offline testing (for example against
the Cosmos DB emulator).

## [0.2.0b1] (2026-06-30)

#### Features Added
Expand Down
48 changes: 33 additions & 15 deletions azure/cosmos/agent_memory/aio/cosmos_memory_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ def __init__(
chat_deployment_name: str = "gpt-4o-mini",
use_default_credential: bool = True,
enable_turn_embeddings: Optional[bool] = None,
embeddings_client: Optional[Any] = None,
chat_client: Optional[Any] = None,
processor: Optional[AsyncMemoryProcessor] = None,
transcript_metadata_keys: Optional[Iterable[str]] = None,
) -> None:
Expand Down Expand Up @@ -113,19 +115,33 @@ def __init__(
)
self._background_tasks: set[asyncio.Task[Any]] = set()
self._pipeline_init_error: Exception | None = None
self._embeddings_client = AsyncEmbeddingsClient(
endpoint=self._ai_foundry_endpoint,
credential=self._ai_foundry_credential,
api_key=self._ai_foundry_api_key,
model=self._embedding_deployment_name,
dimensions=self._embedding_dimensions,
)
self._chat_client = AsyncChatClient(
endpoint=self._ai_foundry_endpoint,
credential=self._ai_foundry_credential,
api_key=self._ai_foundry_api_key,
model=self._chat_deployment_name,
)
# Embeddings/chat clients may be injected (e.g. an OpenAI-compatible backend, a
# caller-configured client, or a deterministic fake for offline tests). When a client
# is injected the caller owns its lifecycle, so the toolkit does not close it; otherwise
# the toolkit builds the Azure-backed client and closes it in ``close()``.
if embeddings_client is not None:
self._embeddings_client = embeddings_client
self._owns_embeddings_client = False
else:
self._embeddings_client = AsyncEmbeddingsClient(
endpoint=self._ai_foundry_endpoint,
credential=self._ai_foundry_credential,
api_key=self._ai_foundry_api_key,
model=self._embedding_deployment_name,
dimensions=self._embedding_dimensions,
)
self._owns_embeddings_client = True
if chat_client is not None:
self._chat_client = chat_client
self._owns_chat_client = False
else:
self._chat_client = AsyncChatClient(
endpoint=self._ai_foundry_endpoint,
credential=self._ai_foundry_credential,
api_key=self._ai_foundry_api_key,
model=self._chat_deployment_name,
)
self._owns_chat_client = True
self._pipeline: Optional[AsyncPipelineService] = None
self._processor: Optional[AsyncMemoryProcessor] = processor
self._processor_explicit = processor is not None
Expand Down Expand Up @@ -157,8 +173,10 @@ async def close(self) -> None:
if self._processor is not None and not self._processor_explicit:
await self._close_maybe_async(self._processor)
self._processor = None
await self._embeddings_client.close()
await self._close_maybe_async(self._chat_client)
if self._owns_embeddings_client:
await self._close_maybe_async(self._embeddings_client)
if self._owns_chat_client:
await self._close_maybe_async(self._chat_client)
for owns, cred in (
(self._owns_cosmos_credential, self._cosmos_credential),
(self._owns_ai_foundry_credential, self._ai_foundry_credential),
Expand Down
48 changes: 33 additions & 15 deletions azure/cosmos/agent_memory/cosmos_memory_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ def __init__(
chat_deployment_name: str = "gpt-4o-mini",
use_default_credential: bool = True,
enable_turn_embeddings: Optional[bool] = None,
embeddings_client: Optional[Any] = None,
chat_client: Optional[Any] = None,
processor: Optional[MemoryProcessor] = None,
transcript_metadata_keys: Optional[Iterable[str]] = None,
) -> None:
Expand All @@ -105,19 +107,33 @@ def __init__(
use_default_credential=use_default_credential,
enable_turn_embeddings=enable_turn_embeddings,
)
self._embeddings_client = EmbeddingsClient(
endpoint=self._ai_foundry_endpoint,
credential=self._ai_foundry_credential,
api_key=self._ai_foundry_api_key,
model=self._embedding_deployment_name,
dimensions=self._embedding_dimensions,
)
self._chat_client = ChatClient(
endpoint=self._ai_foundry_endpoint,
credential=self._ai_foundry_credential,
api_key=self._ai_foundry_api_key,
model=self._chat_deployment_name,
)
# Embeddings/chat clients may be injected (e.g. an OpenAI-compatible backend, a
# caller-configured client, or a deterministic fake for offline tests). When a client
# is injected the caller owns its lifecycle, so the toolkit does not close it; otherwise
# the toolkit builds the Azure-backed client and closes it in ``close()``.
if embeddings_client is not None:
self._embeddings_client = embeddings_client
self._owns_embeddings_client = False
else:
self._embeddings_client = EmbeddingsClient(
endpoint=self._ai_foundry_endpoint,
credential=self._ai_foundry_credential,
api_key=self._ai_foundry_api_key,
model=self._embedding_deployment_name,
dimensions=self._embedding_dimensions,
)
self._owns_embeddings_client = True
if chat_client is not None:
self._chat_client = chat_client
self._owns_chat_client = False
else:
self._chat_client = ChatClient(
endpoint=self._ai_foundry_endpoint,
credential=self._ai_foundry_credential,
api_key=self._ai_foundry_api_key,
model=self._chat_deployment_name,
)
self._owns_chat_client = True
self._pipeline: Optional[PipelineService] = None
self._processor: Optional[MemoryProcessor] = processor
self._processor_explicit = processor is not None
Expand Down Expand Up @@ -146,8 +162,10 @@ def close(self) -> None:
if self._processor is not None and not self._processor_explicit:
self._close_sync_closeable(self._processor)
self._processor = None
self._close_sync_closeable(self._chat_client)
self._close_sync_closeable(self._embeddings_client)
if self._owns_chat_client:
self._close_sync_closeable(self._chat_client)
if self._owns_embeddings_client:
self._close_sync_closeable(self._embeddings_client)
for owns, cred in (
(self._owns_cosmos_credential, self._cosmos_credential),
(self._owns_ai_foundry_credential, self._ai_foundry_credential),
Expand Down
64 changes: 64 additions & 0 deletions tests/unit/aio/test_cosmos_memory_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,70 @@ def test_default_credential_enabled(self):
assert mem._cosmos_credential is not None


# ===================================================================
# Injected embeddings / chat clients
# ===================================================================


class _FakeEmbeddings:
"""Minimal stand-in for AsyncEmbeddingsClient used to verify injection."""

def __init__(self) -> None:
self.closed = False

async def close(self) -> None:
self.closed = True


class _FakeChat:
"""Minimal stand-in for AsyncChatClient used to verify injection."""

def __init__(self) -> None:
self.closed = False

async def close(self) -> None:
self.closed = True


class TestInjectedModelClients:
def test_injected_clients_are_used_and_not_owned(self):
emb = _FakeEmbeddings()
chat = _FakeChat()
mem = _make_client(embeddings_client=emb, chat_client=chat)

assert mem._embeddings_client is emb
assert mem._chat_client is chat
assert mem._owns_embeddings_client is False
assert mem._owns_chat_client is False

def test_default_clients_are_built_and_owned(self):
mem = _make_client()

assert mem._embeddings_client is not None
assert mem._chat_client is not None
assert mem._owns_embeddings_client is True
assert mem._owns_chat_client is True

def test_clients_can_be_injected_independently(self):
emb = _FakeEmbeddings()
mem = _make_client(embeddings_client=emb)

assert mem._embeddings_client is emb
assert mem._owns_embeddings_client is False
# Chat client was not injected, so the toolkit builds and owns it.
assert mem._owns_chat_client is True

async def test_close_does_not_close_injected_clients(self):
emb = _FakeEmbeddings()
chat = _FakeChat()
mem = _make_client(embeddings_client=emb, chat_client=chat)

await mem.close()

assert emb.closed is False
assert chat.closed is False


# ===================================================================
# Local CRUD (synchronous)
# ===================================================================
Expand Down
64 changes: 64 additions & 0 deletions tests/unit/test_cosmos_memory_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,70 @@ def test_no_credential_when_flag_false(self):
assert mem._ai_foundry_credential is None


# ===================================================================
# Injected embeddings / chat clients
# ===================================================================


class _FakeEmbeddings:
"""Minimal stand-in for EmbeddingsClient used to verify injection."""

def __init__(self) -> None:
self.closed = False

def close(self) -> None:
self.closed = True


class _FakeChat:
"""Minimal stand-in for ChatClient used to verify injection."""

def __init__(self) -> None:
self.closed = False

def close(self) -> None:
self.closed = True


class TestInjectedModelClients:
def test_injected_clients_are_used_and_not_owned(self):
emb = _FakeEmbeddings()
chat = _FakeChat()
mem = _make_client(embeddings_client=emb, chat_client=chat)

assert mem._embeddings_client is emb
assert mem._chat_client is chat
assert mem._owns_embeddings_client is False
assert mem._owns_chat_client is False

def test_default_clients_are_built_and_owned(self):
mem = _make_client()

assert mem._embeddings_client is not None
assert mem._chat_client is not None
assert mem._owns_embeddings_client is True
assert mem._owns_chat_client is True

def test_clients_can_be_injected_independently(self):
emb = _FakeEmbeddings()
mem = _make_client(embeddings_client=emb)

assert mem._embeddings_client is emb
assert mem._owns_embeddings_client is False
# Chat client was not injected, so the toolkit builds and owns it.
assert mem._owns_chat_client is True

def test_close_does_not_close_injected_clients(self):
emb = _FakeEmbeddings()
chat = _FakeChat()
mem = _make_client(embeddings_client=emb, chat_client=chat)

mem.close()

assert emb.closed is False
assert chat.closed is False


# ===================================================================
# Local CRUD
# ===================================================================
Expand Down
Loading