Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
## Release History

### Unreleased
## [0.2.0b1] (2026-06-30)

#### Features Added
* Raw conversation turns can now be embedded and vector-searched. Set
`enable_turn_embeddings=True` (env `ENABLE_TURN_EMBEDDINGS`) to generate an
embedding when each turn is written, then call `search_turns()` (sync and
async, on both the client and store) to semantically search the raw turn
log. See [PR:#22](https://github.com/AzureCosmosDB/AgentMemoryToolkit/pull/22/)

#### Other Changes
* The memories container's vector index type is now configurable instead of being
hard-coded to `diskANN`. Set it via the `vector_index_type` argument to
`create_memory_store(...)` or the `AI_FOUNDRY_EMBEDDING_VECTOR_INDEX_TYPE`
environment variable. Allowed values are `diskANN` (default), `quantizedFlat`,
and `flat`. This lets the toolkit run against Cosmos DB accounts without the
DiskANN capability (for example the classic Cosmos DB emulator), enabling
emulator-backed integration test pipelines.
environment variable. See [PR:#24](https://github.com/AzureCosmosDB/AgentMemoryToolkit/pull/24)
* `ai_foundry_endpoint` now accepts a project-scoped Azure AI Foundry URL
(`https://<resource>.services.ai.azure.com/api/projects/<name>`) in addition
to the account-level inference endpoint. The project path is automatically
stripped to the inference base, so callers can paste whichever form the
Foundry portal shows them without hitting opaque 404s.
to the account-level inference endpoint. See [PR:#23](https://github.com/AzureCosmosDB/AgentMemoryToolkit/pull/23)

### 0.1.0b2 (2026-06-03)
## [0.1.0b2] (2026-06-03)

#### Bugs Fixed
* Hardened memory extraction: stops emitting phantom/synthesized facts the user never asserted, stops extracting facts from `[assistant]:` turns, stops re-processing already-extracted turns (which previously produced reversed `CONTRADICT` decisions and meta-facts like `"X is contradicted by Y"`), and stops storing near-duplicate episodic memories for the same scope. Episodic memories also now embed the actual content instead of a boilerplate `"intent recorded"` string. See [PR:#20](https://github.com/AzureCosmosDB/AgentMemoryToolkit/pull/20/)
Expand Down
2 changes: 1 addition & 1 deletion azure.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

name: azure-cosmos-agent-memory
metadata:
template: azure-cosmos-agent-memory@0.1.0b2
template: azure-cosmos-agent-memory@0.2.0b1

infra:
provider: bicep
Expand Down
11 changes: 6 additions & 5 deletions azure/cosmos/agent_memory/aio/cosmos_memory_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,11 +709,12 @@ async def search_turns(
) -> list[dict[str, Any]]:
"""Vector-search the raw conversation log (requires turn embeddings).

Searches the turns container directly. Turns are strictly thread-scoped
and only vector-searchable when ``enable_turn_embeddings`` was set when
the turns were written. ``user_id`` is required so the search stays
within a single partition rather than fanning out across every user's
raw turns.
Searches the turns container directly. Only vector-searchable when
``enable_turn_embeddings`` was set when the turns were written.
``user_id`` is required and always filters the results. Passing
``thread_id`` as well scopes the search to a single partition; omitting
it fans out across partitions, filtered by ``user_id`` in the WHERE
clause.
"""
return await self._get_store().search_turns(
search_terms=search_terms,
Expand Down
15 changes: 10 additions & 5 deletions azure/cosmos/agent_memory/aio/store/memory_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
MemoryConflictError,
MemoryNotFoundError,
MemoryTypeMismatchError,
ValidationError,
)
from azure.cosmos.agent_memory.logging import get_logger
from azure.cosmos.agent_memory.models import MemoryRecord
Expand Down Expand Up @@ -864,6 +865,7 @@ async def search(
async def search_turns(
self,
search_terms: Optional[str] = None,
user_id: Optional[str] = None,
thread_id: Optional[str] = None,
role: Optional[str] = None,
hybrid_search: bool = False,
Expand All @@ -874,16 +876,19 @@ async def search_turns(
created_after: Optional[str | datetime] = None,
created_before: Optional[str | datetime] = None,
*,
user_id: str,
query: Optional[str] = None,
) -> list[dict[str, Any]]:
"""Search raw conversation turns using vector similarity with optional hybrid ranking.

Turns are strictly thread-scoped and only vector-searchable when turn
embeddings were enabled at write time (see ``enable_turn_embeddings``).
``user_id`` is required so the query is scoped to a single partition
instead of a cross-partition scan over every user's raw turns.
Only vector-searchable when turn embeddings were enabled at write time
(see ``enable_turn_embeddings``). ``user_id`` is required and always
filters the results. When ``thread_id`` is also supplied the query
targets a single partition; when it is omitted the query fans out
across partitions and is filtered by ``user_id`` in the WHERE clause.

"""
if not user_id:
raise ValidationError("user_id is required for search_turns")
terms = require_search_terms(search_terms, query)
_validate_hybrid_search(hybrid_search, terms)
top = top_literal(top_k, name="top_k")
Expand Down
11 changes: 6 additions & 5 deletions azure/cosmos/agent_memory/cosmos_memory_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,11 +672,12 @@ def search_turns(
) -> list[dict[str, Any]]:
"""Vector-search the raw conversation log (requires turn embeddings).

Searches the turns container directly. Turns are strictly thread-scoped
and only vector-searchable when ``enable_turn_embeddings`` was set when
the turns were written. ``user_id`` is required so the search stays
within a single partition rather than fanning out across every user's
raw turns.
Searches the turns container directly. Only vector-searchable when
``enable_turn_embeddings`` was set when the turns were written.
``user_id`` is required and always filters the results. Passing
``thread_id`` as well scopes the search to a single partition; omitting
it fans out across partitions, filtered by ``user_id`` in the WHERE
clause.
"""
return self._get_store().search_turns(
search_terms=search_terms,
Expand Down
15 changes: 10 additions & 5 deletions azure/cosmos/agent_memory/store/memory_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
MemoryConflictError,
MemoryNotFoundError,
MemoryTypeMismatchError,
ValidationError,
)
from azure.cosmos.agent_memory.logging import get_logger
from azure.cosmos.agent_memory.models import MemoryRecord
Expand Down Expand Up @@ -901,6 +902,7 @@ def search(
def search_turns(
self,
search_terms: Optional[str] = None,
user_id: Optional[str] = None,
thread_id: Optional[str] = None,
role: Optional[str] = None,
hybrid_search: bool = False,
Expand All @@ -911,16 +913,19 @@ def search_turns(
created_after: Optional[str | datetime] = None,
created_before: Optional[str | datetime] = None,
*,
user_id: str,
query: Optional[str] = None,
) -> list[dict[str, Any]]:
"""Search raw conversation turns using vector similarity with optional hybrid ranking.

Turns are strictly thread-scoped and only vector-searchable when turn
embeddings were enabled at write time (see ``enable_turn_embeddings``).
``user_id`` is required so the query is scoped to a single partition
instead of a cross-partition scan over every user's raw turns.
Only vector-searchable when turn embeddings were enabled at write time
(see ``enable_turn_embeddings``). ``user_id`` is required and always
filters the results. When ``thread_id`` is also supplied the query
targets a single partition; when it is omitted the query fans out
across partitions and is filtered by ``user_id`` in the WHERE clause.

"""
Comment thread
Copilot marked this conversation as resolved.
if not user_id:
raise ValidationError("user_id is required for search_turns")
terms = require_search_terms(search_terms, query)
_validate_hybrid_search(hybrid_search, terms)
top = top_literal(top_k, name="top_k")
Expand Down
2 changes: 1 addition & 1 deletion function_app/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
azure-functions
azure-functions-durable

azure-cosmos-agent-memory==0.1.0b2
azure-cosmos-agent-memory==0.2.0b1
azure-cosmos>=4.16.0
azure-identity>=1.20

Expand Down
3 changes: 0 additions & 3 deletions infra/modules/cosmos.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,6 @@ resource memoriesSummariesContainer 'Microsoft.DocumentDB/databaseAccounts/sqlDa
}
]
excludedPaths: [
{
path: '/embedding/?'
}
{
path: '/source_memory_ids/*'
}
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespaces = true

[project]
name = "azure-cosmos-agent-memory"
version = "0.1.0b2"
version = "0.2.0b1"
description = "Store, retrieve, and transform AI agent memories backed by Azure Cosmos DB"
readme = "README.md"
license = {file = "LICENSE"}
Expand Down
48 changes: 47 additions & 1 deletion tests/unit/aio/store/test_memory_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from azure.cosmos.agent_memory._container_routing import ContainerKey
from azure.cosmos.agent_memory.aio.store import AsyncMemoryStore
from azure.cosmos.agent_memory.exceptions import MemoryNotFoundError, MemoryTypeMismatchError
from azure.cosmos.agent_memory.exceptions import MemoryNotFoundError, MemoryTypeMismatchError, ValidationError


class AsyncIterator:
Expand Down Expand Up @@ -539,6 +539,52 @@ async def test_search_turns_queries_turns_container():
assert "VectorDistance(c.embedding, @embedding)" in sql


async def test_search_turns_scopes_to_single_partition_with_thread_id():
turns = MagicMock()
turns.query_items.return_value = AsyncIterator([])
embeddings = MagicMock()
embeddings.generate = AsyncMock(return_value=[0.1, 0.2])
store = AsyncMemoryStore(
containers=_containers(turns=turns),
embeddings_client=embeddings,
)

await store.search_turns(search_terms="hello", user_id="u1", thread_id="t1")

kwargs = turns.query_items.call_args.kwargs
assert kwargs["partition_key"] == ["u1", "t1"]


async def test_search_turns_fans_out_across_partitions_without_thread_id():
turns = MagicMock()
turns.query_items.return_value = AsyncIterator([])
embeddings = MagicMock()
embeddings.generate = AsyncMock(return_value=[0.1, 0.2])
store = AsyncMemoryStore(
containers=_containers(turns=turns),
embeddings_client=embeddings,
)

await store.search_turns(search_terms="hello", user_id="u1")

kwargs = turns.query_items.call_args.kwargs
assert "partition_key" not in kwargs


async def test_search_turns_requires_user_id():
turns = MagicMock()
embeddings = MagicMock()
embeddings.generate = AsyncMock(return_value=[0.1, 0.2])
store = AsyncMemoryStore(
containers=_containers(turns=turns),
embeddings_client=embeddings,
)

with pytest.raises(ValidationError):
await store.search_turns(search_terms="hello", user_id=None)
turns.query_items.assert_not_called()


async def test_search_does_not_query_turns_container():
turns = MagicMock()
turns.query_items.return_value = AsyncIterator([])
Expand Down
50 changes: 49 additions & 1 deletion tests/unit/store/test_memory_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest

from azure.cosmos.agent_memory._container_routing import ContainerKey
from azure.cosmos.agent_memory.exceptions import MemoryNotFoundError, MemoryTypeMismatchError
from azure.cosmos.agent_memory.exceptions import MemoryNotFoundError, MemoryTypeMismatchError, ValidationError
from azure.cosmos.agent_memory.store import MemoryStore


Expand Down Expand Up @@ -539,6 +539,54 @@ def test_search_turns_queries_turns_container():
assert "VectorDistance(c.embedding, @embedding)" in sql


def test_search_turns_scopes_to_single_partition_with_thread_id():
turns = MagicMock()
turns.query_items.return_value = []
embeddings = MagicMock()
embeddings.generate.return_value = [0.1, 0.2]
store = MemoryStore(
containers=_containers(turns=turns),
embeddings_client=embeddings,
)

store.search_turns(search_terms="hello", user_id="u1", thread_id="t1")

kwargs = turns.query_items.call_args.kwargs
assert kwargs["partition_key"] == ["u1", "t1"]
assert "enable_cross_partition_query" not in kwargs


def test_search_turns_fans_out_across_partitions_without_thread_id():
turns = MagicMock()
turns.query_items.return_value = []
embeddings = MagicMock()
embeddings.generate.return_value = [0.1, 0.2]
store = MemoryStore(
containers=_containers(turns=turns),
embeddings_client=embeddings,
)

store.search_turns(search_terms="hello", user_id="u1")

kwargs = turns.query_items.call_args.kwargs
assert "partition_key" not in kwargs
assert kwargs["enable_cross_partition_query"] is True


def test_search_turns_requires_user_id():
turns = MagicMock()
embeddings = MagicMock()
embeddings.generate.return_value = [0.1, 0.2]
store = MemoryStore(
containers=_containers(turns=turns),
embeddings_client=embeddings,
)

with pytest.raises(ValidationError):
store.search_turns(search_terms="hello", user_id=None)
turns.query_items.assert_not_called()


def test_search_does_not_query_turns_container():
turns = MagicMock()
turns.query_items.return_value = []
Expand Down
Loading
Loading