diff --git a/CHANGELOG.md b/CHANGELOG.md index 3cc95cc..efc4558 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,22 +1,24 @@ ## Release History -### Unreleased +## [0.2.0b1] (2026-06-30) + +#### Features Added +* Raw conversation turns can now be embedded and vector-searched. Set + `enable_turn_embeddings=True` (env `ENABLE_TURN_EMBEDDINGS`) to generate an + embedding when each turn is written, then call `search_turns()` (sync and + async, on both the client and store) to semantically search the raw turn + log. See [PR:#22](https://github.com/AzureCosmosDB/AgentMemoryToolkit/pull/22/) #### Other Changes * The memories container's vector index type is now configurable instead of being hard-coded to `diskANN`. Set it via the `vector_index_type` argument to `create_memory_store(...)` or the `AI_FOUNDRY_EMBEDDING_VECTOR_INDEX_TYPE` - environment variable. Allowed values are `diskANN` (default), `quantizedFlat`, - and `flat`. This lets the toolkit run against Cosmos DB accounts without the - DiskANN capability (for example the classic Cosmos DB emulator), enabling - emulator-backed integration test pipelines. + environment variable. See [PR:#24](https://github.com/AzureCosmosDB/AgentMemoryToolkit/pull/24) * `ai_foundry_endpoint` now accepts a project-scoped Azure AI Foundry URL (`https://.services.ai.azure.com/api/projects/`) in addition - to the account-level inference endpoint. The project path is automatically - stripped to the inference base, so callers can paste whichever form the - Foundry portal shows them without hitting opaque 404s. + to the account-level inference endpoint. See [PR:#23](https://github.com/AzureCosmosDB/AgentMemoryToolkit/pull/23) -### 0.1.0b2 (2026-06-03) +## [0.1.0b2] (2026-06-03) #### Bugs Fixed * Hardened memory extraction: stops emitting phantom/synthesized facts the user never asserted, stops extracting facts from `[assistant]:` turns, stops re-processing already-extracted turns (which previously produced reversed `CONTRADICT` decisions and meta-facts like `"X is contradicted by Y"`), and stops storing near-duplicate episodic memories for the same scope. Episodic memories also now embed the actual content instead of a boilerplate `"intent recorded"` string. See [PR:#20](https://github.com/AzureCosmosDB/AgentMemoryToolkit/pull/20/) diff --git a/azure.yaml b/azure.yaml index 2543ee8..6a59827 100644 --- a/azure.yaml +++ b/azure.yaml @@ -2,7 +2,7 @@ name: azure-cosmos-agent-memory metadata: - template: azure-cosmos-agent-memory@0.1.0b2 + template: azure-cosmos-agent-memory@0.2.0b1 infra: provider: bicep diff --git a/azure/cosmos/agent_memory/aio/cosmos_memory_client.py b/azure/cosmos/agent_memory/aio/cosmos_memory_client.py index 5d50565..4c5b4a7 100644 --- a/azure/cosmos/agent_memory/aio/cosmos_memory_client.py +++ b/azure/cosmos/agent_memory/aio/cosmos_memory_client.py @@ -709,11 +709,12 @@ async def search_turns( ) -> list[dict[str, Any]]: """Vector-search the raw conversation log (requires turn embeddings). - Searches the turns container directly. Turns are strictly thread-scoped - and only vector-searchable when ``enable_turn_embeddings`` was set when - the turns were written. ``user_id`` is required so the search stays - within a single partition rather than fanning out across every user's - raw turns. + Searches the turns container directly. Only vector-searchable when + ``enable_turn_embeddings`` was set when the turns were written. + ``user_id`` is required and always filters the results. Passing + ``thread_id`` as well scopes the search to a single partition; omitting + it fans out across partitions, filtered by ``user_id`` in the WHERE + clause. """ return await self._get_store().search_turns( search_terms=search_terms, diff --git a/azure/cosmos/agent_memory/aio/store/memory_store.py b/azure/cosmos/agent_memory/aio/store/memory_store.py index d13680f..4f725f9 100644 --- a/azure/cosmos/agent_memory/aio/store/memory_store.py +++ b/azure/cosmos/agent_memory/aio/store/memory_store.py @@ -27,6 +27,7 @@ MemoryConflictError, MemoryNotFoundError, MemoryTypeMismatchError, + ValidationError, ) from azure.cosmos.agent_memory.logging import get_logger from azure.cosmos.agent_memory.models import MemoryRecord @@ -864,6 +865,7 @@ async def search( async def search_turns( self, search_terms: Optional[str] = None, + user_id: Optional[str] = None, thread_id: Optional[str] = None, role: Optional[str] = None, hybrid_search: bool = False, @@ -874,16 +876,19 @@ async def search_turns( created_after: Optional[str | datetime] = None, created_before: Optional[str | datetime] = None, *, - user_id: str, query: Optional[str] = None, ) -> list[dict[str, Any]]: """Search raw conversation turns using vector similarity with optional hybrid ranking. - Turns are strictly thread-scoped and only vector-searchable when turn - embeddings were enabled at write time (see ``enable_turn_embeddings``). - ``user_id`` is required so the query is scoped to a single partition - instead of a cross-partition scan over every user's raw turns. + Only vector-searchable when turn embeddings were enabled at write time + (see ``enable_turn_embeddings``). ``user_id`` is required and always + filters the results. When ``thread_id`` is also supplied the query + targets a single partition; when it is omitted the query fans out + across partitions and is filtered by ``user_id`` in the WHERE clause. + """ + if not user_id: + raise ValidationError("user_id is required for search_turns") terms = require_search_terms(search_terms, query) _validate_hybrid_search(hybrid_search, terms) top = top_literal(top_k, name="top_k") diff --git a/azure/cosmos/agent_memory/cosmos_memory_client.py b/azure/cosmos/agent_memory/cosmos_memory_client.py index 5d960f2..f4924c3 100644 --- a/azure/cosmos/agent_memory/cosmos_memory_client.py +++ b/azure/cosmos/agent_memory/cosmos_memory_client.py @@ -672,11 +672,12 @@ def search_turns( ) -> list[dict[str, Any]]: """Vector-search the raw conversation log (requires turn embeddings). - Searches the turns container directly. Turns are strictly thread-scoped - and only vector-searchable when ``enable_turn_embeddings`` was set when - the turns were written. ``user_id`` is required so the search stays - within a single partition rather than fanning out across every user's - raw turns. + Searches the turns container directly. Only vector-searchable when + ``enable_turn_embeddings`` was set when the turns were written. + ``user_id`` is required and always filters the results. Passing + ``thread_id`` as well scopes the search to a single partition; omitting + it fans out across partitions, filtered by ``user_id`` in the WHERE + clause. """ return self._get_store().search_turns( search_terms=search_terms, diff --git a/azure/cosmos/agent_memory/store/memory_store.py b/azure/cosmos/agent_memory/store/memory_store.py index d0b833a..5755311 100644 --- a/azure/cosmos/agent_memory/store/memory_store.py +++ b/azure/cosmos/agent_memory/store/memory_store.py @@ -25,6 +25,7 @@ MemoryConflictError, MemoryNotFoundError, MemoryTypeMismatchError, + ValidationError, ) from azure.cosmos.agent_memory.logging import get_logger from azure.cosmos.agent_memory.models import MemoryRecord @@ -901,6 +902,7 @@ def search( def search_turns( self, search_terms: Optional[str] = None, + user_id: Optional[str] = None, thread_id: Optional[str] = None, role: Optional[str] = None, hybrid_search: bool = False, @@ -911,16 +913,19 @@ def search_turns( created_after: Optional[str | datetime] = None, created_before: Optional[str | datetime] = None, *, - user_id: str, query: Optional[str] = None, ) -> list[dict[str, Any]]: """Search raw conversation turns using vector similarity with optional hybrid ranking. - Turns are strictly thread-scoped and only vector-searchable when turn - embeddings were enabled at write time (see ``enable_turn_embeddings``). - ``user_id`` is required so the query is scoped to a single partition - instead of a cross-partition scan over every user's raw turns. + Only vector-searchable when turn embeddings were enabled at write time + (see ``enable_turn_embeddings``). ``user_id`` is required and always + filters the results. When ``thread_id`` is also supplied the query + targets a single partition; when it is omitted the query fans out + across partitions and is filtered by ``user_id`` in the WHERE clause. + """ + if not user_id: + raise ValidationError("user_id is required for search_turns") terms = require_search_terms(search_terms, query) _validate_hybrid_search(hybrid_search, terms) top = top_literal(top_k, name="top_k") diff --git a/function_app/requirements.txt b/function_app/requirements.txt index a166c17..96501f6 100644 --- a/function_app/requirements.txt +++ b/function_app/requirements.txt @@ -2,7 +2,7 @@ azure-functions azure-functions-durable -azure-cosmos-agent-memory==0.1.0b2 +azure-cosmos-agent-memory==0.2.0b1 azure-cosmos>=4.16.0 azure-identity>=1.20 diff --git a/infra/modules/cosmos.bicep b/infra/modules/cosmos.bicep index 7f5c07e..f11098e 100644 --- a/infra/modules/cosmos.bicep +++ b/infra/modules/cosmos.bicep @@ -262,9 +262,6 @@ resource memoriesSummariesContainer 'Microsoft.DocumentDB/databaseAccounts/sqlDa } ] excludedPaths: [ - { - path: '/embedding/?' - } { path: '/source_memory_ids/*' } diff --git a/pyproject.toml b/pyproject.toml index 9681f23..3d15411 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ namespaces = true [project] name = "azure-cosmos-agent-memory" -version = "0.1.0b2" +version = "0.2.0b1" description = "Store, retrieve, and transform AI agent memories backed by Azure Cosmos DB" readme = "README.md" license = {file = "LICENSE"} diff --git a/tests/unit/aio/store/test_memory_store.py b/tests/unit/aio/store/test_memory_store.py index 410eb63..2e7940b 100644 --- a/tests/unit/aio/store/test_memory_store.py +++ b/tests/unit/aio/store/test_memory_store.py @@ -7,7 +7,7 @@ from azure.cosmos.agent_memory._container_routing import ContainerKey from azure.cosmos.agent_memory.aio.store import AsyncMemoryStore -from azure.cosmos.agent_memory.exceptions import MemoryNotFoundError, MemoryTypeMismatchError +from azure.cosmos.agent_memory.exceptions import MemoryNotFoundError, MemoryTypeMismatchError, ValidationError class AsyncIterator: @@ -539,6 +539,52 @@ async def test_search_turns_queries_turns_container(): assert "VectorDistance(c.embedding, @embedding)" in sql +async def test_search_turns_scopes_to_single_partition_with_thread_id(): + turns = MagicMock() + turns.query_items.return_value = AsyncIterator([]) + embeddings = MagicMock() + embeddings.generate = AsyncMock(return_value=[0.1, 0.2]) + store = AsyncMemoryStore( + containers=_containers(turns=turns), + embeddings_client=embeddings, + ) + + await store.search_turns(search_terms="hello", user_id="u1", thread_id="t1") + + kwargs = turns.query_items.call_args.kwargs + assert kwargs["partition_key"] == ["u1", "t1"] + + +async def test_search_turns_fans_out_across_partitions_without_thread_id(): + turns = MagicMock() + turns.query_items.return_value = AsyncIterator([]) + embeddings = MagicMock() + embeddings.generate = AsyncMock(return_value=[0.1, 0.2]) + store = AsyncMemoryStore( + containers=_containers(turns=turns), + embeddings_client=embeddings, + ) + + await store.search_turns(search_terms="hello", user_id="u1") + + kwargs = turns.query_items.call_args.kwargs + assert "partition_key" not in kwargs + + +async def test_search_turns_requires_user_id(): + turns = MagicMock() + embeddings = MagicMock() + embeddings.generate = AsyncMock(return_value=[0.1, 0.2]) + store = AsyncMemoryStore( + containers=_containers(turns=turns), + embeddings_client=embeddings, + ) + + with pytest.raises(ValidationError): + await store.search_turns(search_terms="hello", user_id=None) + turns.query_items.assert_not_called() + + async def test_search_does_not_query_turns_container(): turns = MagicMock() turns.query_items.return_value = AsyncIterator([]) diff --git a/tests/unit/store/test_memory_store.py b/tests/unit/store/test_memory_store.py index 3076239..45a5117 100644 --- a/tests/unit/store/test_memory_store.py +++ b/tests/unit/store/test_memory_store.py @@ -6,7 +6,7 @@ import pytest from azure.cosmos.agent_memory._container_routing import ContainerKey -from azure.cosmos.agent_memory.exceptions import MemoryNotFoundError, MemoryTypeMismatchError +from azure.cosmos.agent_memory.exceptions import MemoryNotFoundError, MemoryTypeMismatchError, ValidationError from azure.cosmos.agent_memory.store import MemoryStore @@ -539,6 +539,54 @@ def test_search_turns_queries_turns_container(): assert "VectorDistance(c.embedding, @embedding)" in sql +def test_search_turns_scopes_to_single_partition_with_thread_id(): + turns = MagicMock() + turns.query_items.return_value = [] + embeddings = MagicMock() + embeddings.generate.return_value = [0.1, 0.2] + store = MemoryStore( + containers=_containers(turns=turns), + embeddings_client=embeddings, + ) + + store.search_turns(search_terms="hello", user_id="u1", thread_id="t1") + + kwargs = turns.query_items.call_args.kwargs + assert kwargs["partition_key"] == ["u1", "t1"] + assert "enable_cross_partition_query" not in kwargs + + +def test_search_turns_fans_out_across_partitions_without_thread_id(): + turns = MagicMock() + turns.query_items.return_value = [] + embeddings = MagicMock() + embeddings.generate.return_value = [0.1, 0.2] + store = MemoryStore( + containers=_containers(turns=turns), + embeddings_client=embeddings, + ) + + store.search_turns(search_terms="hello", user_id="u1") + + kwargs = turns.query_items.call_args.kwargs + assert "partition_key" not in kwargs + assert kwargs["enable_cross_partition_query"] is True + + +def test_search_turns_requires_user_id(): + turns = MagicMock() + embeddings = MagicMock() + embeddings.generate.return_value = [0.1, 0.2] + store = MemoryStore( + containers=_containers(turns=turns), + embeddings_client=embeddings, + ) + + with pytest.raises(ValidationError): + store.search_turns(search_terms="hello", user_id=None) + turns.query_items.assert_not_called() + + def test_search_does_not_query_turns_container(): turns = MagicMock() turns.query_items.return_value = [] diff --git a/uv.lock b/uv.lock index 3192c6b..3dea73f 100644 --- a/uv.lock +++ b/uv.lock @@ -2,50 +2,6 @@ version = 1 revision = 3 requires-python = ">=3.11" -[[package]] -name = "agent-memory-toolkit" -version = "0.1.0" -source = { editable = "." } -dependencies = [ - { name = "aiohttp" }, - { name = "azure-cosmos" }, - { name = "azure-identity" }, - { name = "jinja2" }, - { name = "openai" }, - { name = "prompty" }, - { name = "pydantic" }, - { name = "typing-extensions" }, -] - -[package.optional-dependencies] -dev = [ - { name = "pytest" }, - { name = "pytest-asyncio" }, - { name = "pytest-cov" }, - { name = "pytest-mock" }, - { name = "python-dotenv" }, - { name = "ruff" }, -] - -[package.metadata] -requires-dist = [ - { name = "aiohttp", specifier = ">=3.10" }, - { name = "azure-cosmos", specifier = ">=4.16.0" }, - { name = "azure-identity", specifier = ">=1.20" }, - { name = "jinja2", specifier = ">=3.1.4" }, - { name = "openai", specifier = ">=1.60" }, - { name = "prompty", specifier = ">=2.0.0a9" }, - { name = "pydantic", specifier = ">=2.10" }, - { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0" }, - { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23" }, - { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=5.0" }, - { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.12" }, - { name = "python-dotenv", marker = "extra == 'dev'", specifier = ">=1.0" }, - { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4" }, - { name = "typing-extensions", specifier = ">=4.10" }, -] -provides-extras = ["dev"] - [[package]] name = "aiofiles" version = "25.1.0" @@ -236,6 +192,50 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a8/11/1af403b5a1d18a4fca2d720239632c46d7dfbd7597711fbd84dd2a6e6872/azure_cosmos-4.16.0-py3-none-any.whl", hash = "sha256:09227ff057c83344798f6faba8b649a13f71582d4acf6e2f5a3ada9c06e2923c", size = 494932, upload-time = "2026-05-29T17:44:16.036Z" }, ] +[[package]] +name = "azure-cosmos-agent-memory" +version = "0.2.0b1" +source = { editable = "." } +dependencies = [ + { name = "aiohttp" }, + { name = "azure-cosmos" }, + { name = "azure-identity" }, + { name = "jinja2" }, + { name = "openai" }, + { name = "prompty" }, + { name = "pydantic" }, + { name = "typing-extensions" }, +] + +[package.optional-dependencies] +dev = [ + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "pytest-cov" }, + { name = "pytest-mock" }, + { name = "python-dotenv" }, + { name = "ruff" }, +] + +[package.metadata] +requires-dist = [ + { name = "aiohttp", specifier = ">=3.10" }, + { name = "azure-cosmos", specifier = ">=4.16.0" }, + { name = "azure-identity", specifier = ">=1.20" }, + { name = "jinja2", specifier = ">=3.1.4" }, + { name = "openai", specifier = ">=1.60" }, + { name = "prompty", specifier = ">=2.0.0a9" }, + { name = "pydantic", specifier = ">=2.10" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0" }, + { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23" }, + { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=5.0" }, + { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.12" }, + { name = "python-dotenv", marker = "extra == 'dev'", specifier = ">=1.0" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4" }, + { name = "typing-extensions", specifier = ">=4.10" }, +] +provides-extras = ["dev"] + [[package]] name = "azure-identity" version = "1.25.3"