diff --git a/pixi.lock b/pixi.lock index 956415aff2a..e6e724dce74 100644 --- a/pixi.lock +++ b/pixi.lock @@ -394,7 +394,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d2/3f/8ba87d9e287b9d385a02a7114ddcef61b26f86411e121c9003eb509a1773/tenacity-8.5.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/3e/f8/6425ff800894784160290bcb9737878d910b6da6a08633bfe7f2ed8c9ae3/testcontainers-4.9.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/fa/a6/5833ae272ae79dceeea58b6c7381c47cbcbd0113d0d0b04da8ae1ac45e48/testcontainers-4.15.0rc2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fb/12/5911ae3eeec47800503a238d971e51722ccea5feb8569b735184d5fcdbc0/toolz-1.1.0-py3-none-any.whl @@ -523,7 +523,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d2/3f/8ba87d9e287b9d385a02a7114ddcef61b26f86411e121c9003eb509a1773/tenacity-8.5.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/3e/f8/6425ff800894784160290bcb9737878d910b6da6a08633bfe7f2ed8c9ae3/testcontainers-4.9.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/fa/a6/5833ae272ae79dceeea58b6c7381c47cbcbd0113d0d0b04da8ae1ac45e48/testcontainers-4.15.0rc2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fb/12/5911ae3eeec47800503a238d971e51722ccea5feb8569b735184d5fcdbc0/toolz-1.1.0-py3-none-any.whl @@ -652,7 +652,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d2/3f/8ba87d9e287b9d385a02a7114ddcef61b26f86411e121c9003eb509a1773/tenacity-8.5.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/3e/f8/6425ff800894784160290bcb9737878d910b6da6a08633bfe7f2ed8c9ae3/testcontainers-4.9.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/fa/a6/5833ae272ae79dceeea58b6c7381c47cbcbd0113d0d0b04da8ae1ac45e48/testcontainers-4.15.0rc2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fb/12/5911ae3eeec47800503a238d971e51722ccea5feb8569b735184d5fcdbc0/toolz-1.1.0-py3-none-any.whl @@ -793,7 +793,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d2/3f/8ba87d9e287b9d385a02a7114ddcef61b26f86411e121c9003eb509a1773/tenacity-8.5.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/3e/f8/6425ff800894784160290bcb9737878d910b6da6a08633bfe7f2ed8c9ae3/testcontainers-4.9.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/fa/a6/5833ae272ae79dceeea58b6c7381c47cbcbd0113d0d0b04da8ae1ac45e48/testcontainers-4.15.0rc2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fb/12/5911ae3eeec47800503a238d971e51722ccea5feb8569b735184d5fcdbc0/toolz-1.1.0-py3-none-any.whl @@ -915,7 +915,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d2/3f/8ba87d9e287b9d385a02a7114ddcef61b26f86411e121c9003eb509a1773/tenacity-8.5.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/3e/f8/6425ff800894784160290bcb9737878d910b6da6a08633bfe7f2ed8c9ae3/testcontainers-4.9.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/fa/a6/5833ae272ae79dceeea58b6c7381c47cbcbd0113d0d0b04da8ae1ac45e48/testcontainers-4.15.0rc2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fb/12/5911ae3eeec47800503a238d971e51722ccea5feb8569b735184d5fcdbc0/toolz-1.1.0-py3-none-any.whl @@ -1037,7 +1037,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d2/3f/8ba87d9e287b9d385a02a7114ddcef61b26f86411e121c9003eb509a1773/tenacity-8.5.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/3e/f8/6425ff800894784160290bcb9737878d910b6da6a08633bfe7f2ed8c9ae3/testcontainers-4.9.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/fa/a6/5833ae272ae79dceeea58b6c7381c47cbcbd0113d0d0b04da8ae1ac45e48/testcontainers-4.15.0rc2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fb/12/5911ae3eeec47800503a238d971e51722ccea5feb8569b735184d5fcdbc0/toolz-1.1.0-py3-none-any.whl @@ -1226,7 +1226,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d2/3f/8ba87d9e287b9d385a02a7114ddcef61b26f86411e121c9003eb509a1773/tenacity-8.5.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/3e/f8/6425ff800894784160290bcb9737878d910b6da6a08633bfe7f2ed8c9ae3/testcontainers-4.9.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/fa/a6/5833ae272ae79dceeea58b6c7381c47cbcbd0113d0d0b04da8ae1ac45e48/testcontainers-4.15.0rc2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/b5/11/87d6d29fb5d237229d67973a6c9e06e048f01cf4994dee194ab0ea841814/tomlkit-0.14.0-py3-none-any.whl @@ -1399,7 +1399,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d2/3f/8ba87d9e287b9d385a02a7114ddcef61b26f86411e121c9003eb509a1773/tenacity-8.5.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/3e/f8/6425ff800894784160290bcb9737878d910b6da6a08633bfe7f2ed8c9ae3/testcontainers-4.9.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/fa/a6/5833ae272ae79dceeea58b6c7381c47cbcbd0113d0d0b04da8ae1ac45e48/testcontainers-4.15.0rc2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/b5/11/87d6d29fb5d237229d67973a6c9e06e048f01cf4994dee194ab0ea841814/tomlkit-0.14.0-py3-none-any.whl @@ -1572,7 +1572,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d2/3f/8ba87d9e287b9d385a02a7114ddcef61b26f86411e121c9003eb509a1773/tenacity-8.5.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/3e/f8/6425ff800894784160290bcb9737878d910b6da6a08633bfe7f2ed8c9ae3/testcontainers-4.9.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/fa/a6/5833ae272ae79dceeea58b6c7381c47cbcbd0113d0d0b04da8ae1ac45e48/testcontainers-4.15.0rc2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/b5/11/87d6d29fb5d237229d67973a6c9e06e048f01cf4994dee194ab0ea841814/tomlkit-0.14.0-py3-none-any.whl @@ -2229,8 +2229,8 @@ packages: requires_python: '>=3.10' - pypi: ./ name: feast - version: 0.62.1.dev54+g1b4e643d1 - sha256: ec65cdae435224e7c42d59c5ba9e8a0ba4d8319f0c7a62a9b89f431a17d4cf8a + version: 0.62.1.dev58+g4f142a3c1.d20260501 + sha256: 168dda185ac1fd0b97d388a4c5af598965dc17dfaaf09d35c7bb4ddbd3dafe43 requires_dist: - click>=7.0.0,<9.0.0 - colorama>=0.3.9,<1 @@ -2341,7 +2341,7 @@ packages: - pytest-benchmark>=3.4.1,<4 ; extra == 'test' - pytest-asyncio<=0.24.0 ; extra == 'test' - py>=1.11.0 ; extra == 'test' - - testcontainers==4.9.0 ; extra == 'test' + - testcontainers>=4.15.0rc2 ; extra == 'test' - minio==7.2.11 ; extra == 'test' - python-keycloak==4.2.2 ; extra == 'test' - cryptography>=43.0 ; extra == 'test' @@ -6861,50 +6861,62 @@ packages: - tornado>=4.5 ; extra == 'test' - typeguard ; extra == 'test' requires_python: '>=3.8' -- pypi: https://files.pythonhosted.org/packages/3e/f8/6425ff800894784160290bcb9737878d910b6da6a08633bfe7f2ed8c9ae3/testcontainers-4.9.0-py3-none-any.whl +- pypi: https://files.pythonhosted.org/packages/fa/a6/5833ae272ae79dceeea58b6c7381c47cbcbd0113d0d0b04da8ae1ac45e48/testcontainers-4.15.0rc2-py3-none-any.whl name: testcontainers - version: 4.9.0 - sha256: c6fee929990972c40bf6b91b7072c94064ff3649b405a14fde0274c8b2479d32 - requires_dist: - - azure-cosmos ; extra == 'cosmosdb' - - azure-storage-blob>=12.19,<13.0 ; extra == 'azurite' - - bcrypt ; extra == 'registry' - - boto3 ; extra == 'aws' or extra == 'localstack' - - cassandra-driver==3.29.1 ; extra == 'scylla' - - chromadb-client ; extra == 'chroma' - - clickhouse-driver ; extra == 'clickhouse' - - cryptography ; extra == 'mailpit' or extra == 'sftp' + version: 4.15.0rc2 + sha256: e55b9045842c5bdfdd295e0d0b09aeafb3c1fb9d6f30bd8e718df8fd48dcdc41 + requires_dist: - docker - - google-cloud-datastore>=2 ; extra == 'google' - - google-cloud-pubsub>=2 ; extra == 'google' - - httpx ; extra == 'aws' or extra == 'generic' or extra == 'test-module-import' - - ibm-db-sa ; extra == 'db2' - - influxdb ; extra == 'influxdb' - - influxdb-client ; extra == 'influxdb' - - kubernetes ; extra == 'k3s' - - minio ; extra == 'minio' - - nats-py ; extra == 'nats' - - neo4j ; extra == 'neo4j' - - opensearch-py ; extra == 'opensearch' - - oracledb ; extra == 'oracle' or extra == 'oracle-free' - - pika ; extra == 'rabbitmq' - - pymongo ; extra == 'mongodb' - - pymssql ; extra == 'mssql' - - pymysql[rsa] ; extra == 'mysql' - - python-arango>=7.8,<8.0 ; extra == 'arangodb' - python-dotenv - - python-keycloak ; extra == 'keycloak' - - pyyaml ; extra == 'k3s' - - qdrant-client ; extra == 'qdrant' - - redis ; extra == 'generic' or extra == 'redis' - - selenium ; extra == 'selenium' - - sqlalchemy ; extra == 'db2' or extra == 'mssql' or extra == 'mysql' or extra == 'oracle' or extra == 'oracle-free' - - trino ; extra == 'trino' - typing-extensions - urllib3 - - weaviate-client>=4.5.4,<5.0.0 ; extra == 'weaviate' - wrapt - requires_python: '>=3.9,<4.0' + - python-arango>=8 ; extra == 'arangodb' + - boto3>=1 ; extra == 'aws' + - httpx ; extra == 'aws' + - azure-storage-blob>=12 ; extra == 'azurite' + - chromadb-client>=1 ; extra == 'chroma' + - clickhouse-driver ; extra == 'clickhouse' + - azure-cosmos>=4 ; extra == 'cosmosdb' + - ibm-db-sa ; platform_machine != 'aarch64' and platform_machine != 'arm64' and extra == 'db2' + - sqlalchemy>=2 ; extra == 'db2' + - httpx ; extra == 'generic' + - redis>=7 ; extra == 'generic' + - sqlalchemy ; extra == 'generic' + - google-cloud-datastore>=2 ; extra == 'google' + - google-cloud-pubsub>=2 ; extra == 'google' + - influxdb-client>=1 ; extra == 'influxdb' + - influxdb>=5 ; extra == 'influxdb' + - kubernetes ; extra == 'k3s' + - pyyaml>=6.0.3 ; extra == 'k3s' + - python-keycloak>=6 ; python_full_version < '4' and extra == 'keycloak' + - boto3>=1 ; extra == 'localstack' + - cryptography ; extra == 'mailpit' + - minio>=7 ; extra == 'minio' + - pymongo>=4 ; extra == 'mongodb' + - pymssql>=2 ; extra == 'mssql' + - sqlalchemy>=2 ; extra == 'mssql' + - pymysql[rsa]>=1 ; extra == 'mysql' + - sqlalchemy>=2 ; extra == 'mysql' + - nats-py>=2 ; extra == 'nats' + - neo4j>=6 ; extra == 'neo4j' + - openfga-sdk ; extra == 'openfga' + - opensearch-py>=3 ; python_full_version < '4' and extra == 'opensearch' + - oracledb>=3 ; extra == 'oracle' + - sqlalchemy>=2 ; extra == 'oracle' + - oracledb>=3 ; extra == 'oracle-free' + - sqlalchemy>=2 ; extra == 'oracle-free' + - qdrant-client>=1 ; extra == 'qdrant' + - pika>=1 ; extra == 'rabbitmq' + - redis>=7 ; extra == 'redis' + - bcrypt>=5 ; extra == 'registry' + - cassandra-driver>=3 ; python_full_version < '3.14' and extra == 'scylla' + - selenium>=4 ; extra == 'selenium' + - cryptography ; extra == 'sftp' + - httpx ; extra == 'test-module-import' + - trino ; extra == 'trino' + - weaviate-client>=4 ; extra == 'weaviate' + requires_python: '>=3.10' - conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h366c992_103.conda sha256: cafeec44494f842ffeca27e9c8b0c27ed714f93ac77ddadc6aaf726b5554ebac md5: cffd3bdd58090148f4cfcd831f4b26ab diff --git a/pyproject.toml b/pyproject.toml index fb3415bd93d..a7b6b79ae33 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -146,7 +146,7 @@ test = [ "pytest-benchmark>=3.4.1,<4", "pytest-asyncio<=0.24.0", "py>=1.11.0", - "testcontainers==4.9.0", + "testcontainers>=4.15.0rc2", "minio==7.2.11", "python-keycloak==4.2.2", "cryptography>=43.0", diff --git a/sdk/python/feast/infra/online_stores/mongodb_online_store/mongodb.py b/sdk/python/feast/infra/online_stores/mongodb_online_store/mongodb.py index 3e7a3db84c8..d0105607d43 100644 --- a/sdk/python/feast/infra/online_stores/mongodb_online_store/mongodb.py +++ b/sdk/python/feast/infra/online_stores/mongodb_online_store/mongodb.py @@ -1,28 +1,33 @@ from __future__ import annotations +import time from datetime import datetime from logging import getLogger -from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple +from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple, Union try: from pymongo import AsyncMongoClient, MongoClient, UpdateOne from pymongo.asynchronous.collection import AsyncCollection from pymongo.collection import Collection from pymongo.driver_info import DriverInfo + from pymongo.operations import SearchIndexModel except ImportError as e: from feast.errors import FeastExtrasDependencyImportError raise FeastExtrasDependencyImportError("mongodb", str(e)) import feast.version +from feast.batch_feature_view import BatchFeatureView from feast.entity import Entity from feast.feature_view import FeatureView -from feast.infra.key_encoding_utils import serialize_entity_key +from feast.infra.key_encoding_utils import deserialize_entity_key, serialize_entity_key from feast.infra.online_stores.online_store import OnlineStore +from feast.infra.online_stores.vector_store import VectorStoreConfig from feast.infra.supported_async_methods import SupportedAsyncMethods from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.repo_config import FeastConfigBaseModel, RepoConfig +from feast.stream_feature_view import StreamFeatureView from feast.type_map import ( feast_value_type_to_python_type, python_values_to_proto_values, @@ -33,7 +38,7 @@ DRIVER_METADATA = DriverInfo(name="Feast", version=feast.version.get_version()) -class MongoDBOnlineStoreConfig(FeastConfigBaseModel): +class MongoDBOnlineStoreConfig(FeastConfigBaseModel, VectorStoreConfig): """MongoDB configuration. For a description of kwargs that may be passed to MongoClient, @@ -48,6 +53,11 @@ class MongoDBOnlineStoreConfig(FeastConfigBaseModel): ) collection_suffix: str = "latest" client_kwargs: Dict[str, Any] = {} + # vector_enabled and similarity are inherited from VectorStoreConfig + vector_index_wait_timeout: int = 60 + """Seconds to wait for a newly created Atlas Search index to become READY.""" + vector_index_wait_poll_interval: float = 1.0 + """Seconds between polls when waiting for an Atlas Search index to become READY.""" class MongoDBOnlineStore(OnlineStore): @@ -213,11 +223,135 @@ def online_read( return self._convert_raw_docs_to_proto(ids, docs, table) + def retrieve_online_documents_v2( + self, + config: RepoConfig, + table: FeatureView, + requested_features: List[str], + embedding: Optional[List[float]], + top_k: int, + distance_metric: Optional[str] = None, + query_string: Optional[str] = None, + include_feature_view_version_metadata: bool = False, + ) -> List[ + Tuple[ + Optional[datetime], + Optional[EntityKeyProto], + Optional[Dict[str, ValueProto]], + ] + ]: + """Retrieve documents via MongoDB Atlas Vector Search ($vectorSearch). + + Uses the ``$vectorSearch`` aggregation stage to find the *top_k* + documents closest to the provided *embedding* vector. The method + expects that an Atlas vector search index has already been created for + the relevant field (see ``update()``). + + Returns a list of 3-tuples ``(event_timestamp, entity_key_proto, + feature_dict)`` where *feature_dict* includes the requested feature + values plus a synthetic ``distance`` key with the vector search score. + """ + if not config.online_store.vector_enabled: + raise ValueError( + "Vector search is not enabled in the online store config. " + "Set vector_enabled=True in MongoDBOnlineStoreConfig." + ) + if embedding is None: + raise ValueError( + "An embedding vector must be provided for MongoDB vector search." + ) + + clxn = self._get_collection(config) + + # Identify the vector field on this feature view + vector_fields = [f for f in table.features if f.vector_index] + if not vector_fields: + raise ValueError( + f"Feature view '{table.name}' has no fields with vector_index=True." + ) + vector_field = vector_fields[0] + path = f"features.{table.name}.{vector_field.name}" + idx_name = self._vector_search_index_name(table.name, vector_field.name) + + # BSON cannot encode numpy float types — ensure native Python floats. + query_vector = [float(v) for v in embedding] + + num_candidates = max(top_k * 10, 100) + pipeline: List[Dict[str, Any]] = [ + { + "$vectorSearch": { + "index": idx_name, + "path": path, + "queryVector": query_vector, + "numCandidates": num_candidates, + "limit": top_k, + } + }, + { + "$addFields": { + "score": {"$meta": "vectorSearchScore"}, + } + }, + ] + + results: List[ + Tuple[ + Optional[datetime], + Optional[EntityKeyProto], + Optional[Dict[str, ValueProto]], + ] + ] = [] + + for doc in clxn.aggregate(pipeline): + # Deserialize entity key + entity_key_bin = doc.get("_id") + entity_key_proto = ( + deserialize_entity_key( + entity_key_bin, + entity_key_serialization_version=config.entity_key_serialization_version, + ) + if entity_key_bin + else None + ) + + # Event timestamp + event_ts = doc.get("event_timestamps", {}).get(table.name) + + # Build feature dict from raw doc values + fv_features = doc.get("features", {}).get(table.name, {}) + + # Convert raw values → ValueProto for each requested feature + feature_dict: Dict[str, ValueProto] = {} + feature_type_map = {f.name: f.dtype.to_value_type() for f in table.features} + + for feat_name in requested_features: + raw_val = fv_features.get(feat_name) + if raw_val is not None: + vtype = feature_type_map.get(feat_name) + if vtype is not None: + protos = python_values_to_proto_values( + [raw_val], feature_type=vtype + ) + feature_dict[feat_name] = protos[0] + else: + # Fall back: try to store as-is + feature_dict[feat_name] = _python_value_to_proto(raw_val) + + # Add distance (vector search score) + score = doc.get("score", 0.0) + feature_dict["distance"] = ValueProto(float_val=float(score)) + + results.append((event_ts, entity_key_proto, feature_dict)) + + return results + def update( self, config: RepoConfig, tables_to_delete: Sequence[FeatureView], - tables_to_keep: Sequence[FeatureView], + tables_to_keep: Sequence[ + Union[BatchFeatureView, StreamFeatureView, FeatureView] + ], entities_to_delete: Sequence[Entity], entities_to_keep: Sequence[Entity], partial: bool, @@ -238,20 +372,32 @@ def update( } We remove any feature views named in tables_to_delete. The Entities are serialized in the _id. No schema needs be adjusted. + + When ``vector_enabled`` is set in the online store config, Atlas Vector + Search indexes are automatically created for feature views containing + fields with ``vector_index=True`` and dropped for deleted feature views. """ if not isinstance(config.online_store, MongoDBOnlineStoreConfig): raise RuntimeError(f"{config.online_store.type = }. It must be mongodb.") + online_config = config.online_store clxn = self._get_collection(repo_config=config) + # --- Remove deleted feature views (data + vector search indexes) --- if tables_to_delete: unset_fields = {} for fv in tables_to_delete: unset_fields[f"features.{fv.name}"] = "" unset_fields[f"event_timestamps.{fv.name}"] = "" - clxn.update_many({}, {"$unset": unset_fields}) + if online_config.vector_enabled: + self._drop_vector_indexes_for_tables(clxn, tables_to_delete) + + # --- Create vector search indexes for kept feature views --- + if online_config.vector_enabled: + self._ensure_vector_indexes(clxn, tables_to_keep, online_config) + # Note: entities_to_delete contains Entity definitions (metadata), not entity instances. # Like other online stores, we don't need to do anything with entities_to_delete here. @@ -286,6 +432,136 @@ async def close(self) -> None: # Helpers # ------------------------------------------------------------------ + # -- Vector Search helpers ------------------------------------------ + + @staticmethod + def _vector_search_index_name(fv_name: str, field_name: str) -> str: + """Canonical Atlas vector search index name for a (feature_view, field) pair.""" + return f"{fv_name}__{field_name}__vs_index" + + @staticmethod + def _vector_search_index_definition( + path: str, + num_dimensions: int, + similarity: str, + ) -> dict: + """Return a vector search index definition for ``SearchIndexModel``.""" + return { + "fields": [ + { + "type": "vector", + "path": path, + "numDimensions": num_dimensions, + "similarity": similarity, + } + ] + } + + @staticmethod + def _wait_for_index_ready( + collection: Collection, + index_name: str, + timeout: int, + poll_interval: float = 1.0, + ) -> None: + """Poll until the named Atlas Search index reaches READY status. + + Raises ``TimeoutError`` if the index does not become queryable + within *timeout* seconds. + """ + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + for idx in collection.list_search_indexes(name=index_name): + if idx.get("status") == "READY" or idx.get("queryable") is True: + return + time.sleep(poll_interval) + raise TimeoutError( + f"Atlas search index '{index_name}' did not reach READY " + f"within {timeout}s. Increase vector_index_wait_timeout in " + f"MongoDBOnlineStoreConfig if the index needs more time." + ) + + def _drop_vector_indexes_for_tables( + self, + collection: Collection, + tables: Sequence[FeatureView], + ) -> None: + """Drop all Atlas vector search indexes belonging to the given feature views.""" + existing = {idx["name"] for idx in collection.list_search_indexes()} + for fv in tables: + for field in fv.features: + idx_name = self._vector_search_index_name(fv.name, field.name) + if idx_name in existing: + logger.info("Dropping vector search index: %s", idx_name) + collection.drop_search_index(idx_name) + + def _ensure_vector_indexes( + self, + collection: Collection, + tables: Sequence[Union[BatchFeatureView, StreamFeatureView, FeatureView]], + online_config: MongoDBOnlineStoreConfig, + ) -> None: + """Create Atlas vector search indexes for vector-indexed fields if they don't exist. + + Currently creates one index per (feature_view, vector_field) pair. + A future optimization could consolidate all vector fields into a + single composite index with multiple field definitions, reducing + cluster-wide index count and memory overhead. See Atlas limits: + hard cap of 2,500 search indexes per cluster; smaller tiers (M10) + may degrade well before that. + """ + db = collection.database + if collection.name not in db.list_collection_names(): + db.create_collection(collection.name) + + existing = {idx["name"] for idx in collection.list_search_indexes()} + + for fv in tables: + vector_fields = [f for f in fv.features if f.vector_index] + for field in vector_fields: + idx_name = self._vector_search_index_name(fv.name, field.name) + if idx_name in existing: + logger.debug("Vector search index '%s' already exists", idx_name) + continue + + path = f"features.{fv.name}.{field.name}" + num_dimensions = field.vector_length + if not num_dimensions: + raise ValueError( + f"Field '{field.name}' in feature view '{fv.name}' has " + f"vector_index=True but vector_length is not set." + ) + + similarity = ( + field.vector_search_metric or online_config.similarity or "cosine" + ) + + definition = self._vector_search_index_definition( + path, num_dimensions, similarity + ) + search_index_model = SearchIndexModel( + definition=definition, + name=idx_name, + type="vectorSearch", + ) + logger.info( + "Creating Atlas vector search index '%s' on path '%s' " + "(dims=%d, similarity=%s)", + idx_name, + path, + num_dimensions, + similarity, + ) + collection.create_search_index(model=search_index_model) + self._wait_for_index_ready( + collection, + idx_name, + online_config.vector_index_wait_timeout, + online_config.vector_index_wait_poll_interval, + ) + + # -- Connection helpers --------------------------------------------- + def _get_client(self, config: RepoConfig): """Returns a connection to the server.""" online_store_config = config.online_store @@ -494,5 +770,20 @@ async def online_write_batch_async( progress(len(data)) -# TODO -# - Vector Search (requires atlas image in testcontainers or similar) +def _python_value_to_proto(value: Any) -> ValueProto: + """Best-effort conversion of a single Python value to ValueProto.""" + if isinstance(value, float): + return ValueProto(float_val=value) + elif isinstance(value, bool): + return ValueProto(bool_val=value) + elif isinstance(value, int): + return ValueProto(int64_val=value) + elif isinstance(value, str): + return ValueProto(string_val=value) + elif isinstance(value, bytes): + return ValueProto(bytes_val=value) + elif isinstance(value, list) and all(isinstance(v, float) for v in value): + proto = ValueProto() + proto.float_list_val.val.extend(value) + return proto + return ValueProto() diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index bc80d3f5a1c..6b9913159f1 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -4145,9 +4145,9 @@ pymssql==2.3.2 \ --hash=sha256:fb8a7b197aaf466a7577ca6690aa9d747081b653ab212d052d71f3cc10587c3b \ --hash=sha256:fdd774b26407babd0205ef85a098f90553e6b3da77a22322a1e7d2cb51f742c0 # via feast (pyproject.toml) -pymysql==1.1.2 \ - --hash=sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03 \ - --hash=sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9 +pymysql==1.1.3 \ + --hash=sha256:8164ba62c552f6105f3b11753352d0f16b90d1703ba67d81923d5a8a5d1c5289 \ + --hash=sha256:e70ebf2047a4edf6138cf79c68ad418ef620af65900aa585c5e8bfc95044d43a # via feast (pyproject.toml) pyodbc==5.3.0 \ --hash=sha256:01166162149adf2b8a6dc21a212718f205cabbbdff4047dc0c415af3fd85867e \ @@ -5504,9 +5504,9 @@ terminado==0.18.1 \ # via # jupyter-server # jupyter-server-terminals -testcontainers==4.9.0 \ - --hash=sha256:2cd6af070109ff68c1ab5389dc89c86c2dc3ab30a21ca734b2cb8f0f80ad479e \ - --hash=sha256:c6fee929990972c40bf6b91b7072c94064ff3649b405a14fde0274c8b2479d32 +testcontainers==4.15.0rc2 \ + --hash=sha256:4764016e73da0fa960eb8360687d22710cd68bcc01a4d03189fbe1da896a805d \ + --hash=sha256:e55b9045842c5bdfdd295e0d0b09aeafb3c1fb9d6f30bd8e718df8fd48dcdc41 # via feast (pyproject.toml) threadpoolctl==3.6.0 \ --hash=sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb \ diff --git a/sdk/python/requirements/py3.10-minimal-requirements.txt b/sdk/python/requirements/py3.10-minimal-requirements.txt index adbeb0b99a5..bc2c15b8eac 100644 --- a/sdk/python/requirements/py3.10-minimal-requirements.txt +++ b/sdk/python/requirements/py3.10-minimal-requirements.txt @@ -2238,9 +2238,9 @@ pymilvus==2.5.18 \ --hash=sha256:1b78badcfa8d62db7d0b29193fc0422e4676873ff1c745a9d75c2c885d7a7e32 \ --hash=sha256:9e517076068e98dac51c018bc0dfe1f651d936154e2e2d9ad6c7b3dab1164e2d # via feast (pyproject.toml) -pymysql==1.1.2 \ - --hash=sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03 \ - --hash=sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9 +pymysql==1.1.3 \ + --hash=sha256:8164ba62c552f6105f3b11753352d0f16b90d1703ba67d81923d5a8a5d1c5289 \ + --hash=sha256:e70ebf2047a4edf6138cf79c68ad418ef620af65900aa585c5e8bfc95044d43a # via feast (pyproject.toml) pyopenssl==26.1.0 \ --hash=sha256:115563879b2c8ccb207975705d3e491434d8c9d7c79667c902ecbf5f3bbd2ece \ diff --git a/sdk/python/requirements/py3.10-minimal-sdist-requirements.txt b/sdk/python/requirements/py3.10-minimal-sdist-requirements.txt index 8628e0f3db3..7248630c0c1 100644 --- a/sdk/python/requirements/py3.10-minimal-sdist-requirements.txt +++ b/sdk/python/requirements/py3.10-minimal-sdist-requirements.txt @@ -2439,9 +2439,9 @@ pymilvus==2.5.18 \ --hash=sha256:1b78badcfa8d62db7d0b29193fc0422e4676873ff1c745a9d75c2c885d7a7e32 \ --hash=sha256:9e517076068e98dac51c018bc0dfe1f651d936154e2e2d9ad6c7b3dab1164e2d # via feast (pyproject.toml) -pymysql==1.1.2 \ - --hash=sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03 \ - --hash=sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9 +pymysql==1.1.3 \ + --hash=sha256:8164ba62c552f6105f3b11753352d0f16b90d1703ba67d81923d5a8a5d1c5289 \ + --hash=sha256:e70ebf2047a4edf6138cf79c68ad418ef620af65900aa585c5e8bfc95044d43a # via feast (pyproject.toml) pyopenssl==26.1.0 \ --hash=sha256:115563879b2c8ccb207975705d3e491434d8c9d7c79667c902ecbf5f3bbd2ece \ diff --git a/sdk/python/requirements/py3.11-ci-requirements.txt b/sdk/python/requirements/py3.11-ci-requirements.txt index c3deba0cd19..d6d91e0f371 100644 --- a/sdk/python/requirements/py3.11-ci-requirements.txt +++ b/sdk/python/requirements/py3.11-ci-requirements.txt @@ -4318,9 +4318,9 @@ pymssql==2.3.2 \ --hash=sha256:fb8a7b197aaf466a7577ca6690aa9d747081b653ab212d052d71f3cc10587c3b \ --hash=sha256:fdd774b26407babd0205ef85a098f90553e6b3da77a22322a1e7d2cb51f742c0 # via feast (pyproject.toml) -pymysql==1.1.2 \ - --hash=sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03 \ - --hash=sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9 +pymysql==1.1.3 \ + --hash=sha256:8164ba62c552f6105f3b11753352d0f16b90d1703ba67d81923d5a8a5d1c5289 \ + --hash=sha256:e70ebf2047a4edf6138cf79c68ad418ef620af65900aa585c5e8bfc95044d43a # via feast (pyproject.toml) pynacl==1.6.2 \ --hash=sha256:018494d6d696ae03c7e656e5e74cdfd8ea1326962cc401bcf018f1ed8436811c \ @@ -5758,9 +5758,9 @@ terminado==0.18.1 \ # via # jupyter-server # jupyter-server-terminals -testcontainers==4.9.0 \ - --hash=sha256:2cd6af070109ff68c1ab5389dc89c86c2dc3ab30a21ca734b2cb8f0f80ad479e \ - --hash=sha256:c6fee929990972c40bf6b91b7072c94064ff3649b405a14fde0274c8b2479d32 +testcontainers==4.15.0rc2 \ + --hash=sha256:4764016e73da0fa960eb8360687d22710cd68bcc01a4d03189fbe1da896a805d \ + --hash=sha256:e55b9045842c5bdfdd295e0d0b09aeafb3c1fb9d6f30bd8e718df8fd48dcdc41 # via feast (pyproject.toml) threadpoolctl==3.6.0 \ --hash=sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb \ diff --git a/sdk/python/requirements/py3.11-minimal-requirements.txt b/sdk/python/requirements/py3.11-minimal-requirements.txt index 5252773a1f9..48a46a21175 100644 --- a/sdk/python/requirements/py3.11-minimal-requirements.txt +++ b/sdk/python/requirements/py3.11-minimal-requirements.txt @@ -2249,9 +2249,9 @@ pymilvus==2.5.18 \ --hash=sha256:1b78badcfa8d62db7d0b29193fc0422e4676873ff1c745a9d75c2c885d7a7e32 \ --hash=sha256:9e517076068e98dac51c018bc0dfe1f651d936154e2e2d9ad6c7b3dab1164e2d # via feast (pyproject.toml) -pymysql==1.1.2 \ - --hash=sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03 \ - --hash=sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9 +pymysql==1.1.3 \ + --hash=sha256:8164ba62c552f6105f3b11753352d0f16b90d1703ba67d81923d5a8a5d1c5289 \ + --hash=sha256:e70ebf2047a4edf6138cf79c68ad418ef620af65900aa585c5e8bfc95044d43a # via feast (pyproject.toml) pyopenssl==26.1.0 \ --hash=sha256:115563879b2c8ccb207975705d3e491434d8c9d7c79667c902ecbf5f3bbd2ece \ diff --git a/sdk/python/requirements/py3.11-minimal-sdist-requirements.txt b/sdk/python/requirements/py3.11-minimal-sdist-requirements.txt index 96cf50e9a5f..84fa82236dc 100644 --- a/sdk/python/requirements/py3.11-minimal-sdist-requirements.txt +++ b/sdk/python/requirements/py3.11-minimal-sdist-requirements.txt @@ -2448,9 +2448,9 @@ pymilvus==2.5.18 \ --hash=sha256:1b78badcfa8d62db7d0b29193fc0422e4676873ff1c745a9d75c2c885d7a7e32 \ --hash=sha256:9e517076068e98dac51c018bc0dfe1f651d936154e2e2d9ad6c7b3dab1164e2d # via feast (pyproject.toml) -pymysql==1.1.2 \ - --hash=sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03 \ - --hash=sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9 +pymysql==1.1.3 \ + --hash=sha256:8164ba62c552f6105f3b11753352d0f16b90d1703ba67d81923d5a8a5d1c5289 \ + --hash=sha256:e70ebf2047a4edf6138cf79c68ad418ef620af65900aa585c5e8bfc95044d43a # via feast (pyproject.toml) pyopenssl==26.1.0 \ --hash=sha256:115563879b2c8ccb207975705d3e491434d8c9d7c79667c902ecbf5f3bbd2ece \ diff --git a/sdk/python/requirements/py3.12-ci-requirements.txt b/sdk/python/requirements/py3.12-ci-requirements.txt index 6306d3b76a3..fd43a7b70a9 100644 --- a/sdk/python/requirements/py3.12-ci-requirements.txt +++ b/sdk/python/requirements/py3.12-ci-requirements.txt @@ -4308,9 +4308,9 @@ pymssql==2.3.2 \ --hash=sha256:fb8a7b197aaf466a7577ca6690aa9d747081b653ab212d052d71f3cc10587c3b \ --hash=sha256:fdd774b26407babd0205ef85a098f90553e6b3da77a22322a1e7d2cb51f742c0 # via feast (pyproject.toml) -pymysql==1.1.2 \ - --hash=sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03 \ - --hash=sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9 +pymysql==1.1.3 \ + --hash=sha256:8164ba62c552f6105f3b11753352d0f16b90d1703ba67d81923d5a8a5d1c5289 \ + --hash=sha256:e70ebf2047a4edf6138cf79c68ad418ef620af65900aa585c5e8bfc95044d43a # via feast (pyproject.toml) pynacl==1.6.2 \ --hash=sha256:018494d6d696ae03c7e656e5e74cdfd8ea1326962cc401bcf018f1ed8436811c \ @@ -5748,9 +5748,9 @@ terminado==0.18.1 \ # via # jupyter-server # jupyter-server-terminals -testcontainers==4.9.0 \ - --hash=sha256:2cd6af070109ff68c1ab5389dc89c86c2dc3ab30a21ca734b2cb8f0f80ad479e \ - --hash=sha256:c6fee929990972c40bf6b91b7072c94064ff3649b405a14fde0274c8b2479d32 +testcontainers==4.15.0rc2 \ + --hash=sha256:4764016e73da0fa960eb8360687d22710cd68bcc01a4d03189fbe1da896a805d \ + --hash=sha256:e55b9045842c5bdfdd295e0d0b09aeafb3c1fb9d6f30bd8e718df8fd48dcdc41 # via feast (pyproject.toml) threadpoolctl==3.6.0 \ --hash=sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb \ diff --git a/sdk/python/requirements/py3.12-minimal-requirements.txt b/sdk/python/requirements/py3.12-minimal-requirements.txt index faf1359c285..d06134cb36e 100644 --- a/sdk/python/requirements/py3.12-minimal-requirements.txt +++ b/sdk/python/requirements/py3.12-minimal-requirements.txt @@ -2241,9 +2241,9 @@ pymilvus==2.5.18 \ --hash=sha256:1b78badcfa8d62db7d0b29193fc0422e4676873ff1c745a9d75c2c885d7a7e32 \ --hash=sha256:9e517076068e98dac51c018bc0dfe1f651d936154e2e2d9ad6c7b3dab1164e2d # via feast (pyproject.toml) -pymysql==1.1.2 \ - --hash=sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03 \ - --hash=sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9 +pymysql==1.1.3 \ + --hash=sha256:8164ba62c552f6105f3b11753352d0f16b90d1703ba67d81923d5a8a5d1c5289 \ + --hash=sha256:e70ebf2047a4edf6138cf79c68ad418ef620af65900aa585c5e8bfc95044d43a # via feast (pyproject.toml) pyopenssl==26.1.0 \ --hash=sha256:115563879b2c8ccb207975705d3e491434d8c9d7c79667c902ecbf5f3bbd2ece \ diff --git a/sdk/python/requirements/py3.12-minimal-sdist-requirements.txt b/sdk/python/requirements/py3.12-minimal-sdist-requirements.txt index 1b2f57db7a2..c5231d0fd24 100644 --- a/sdk/python/requirements/py3.12-minimal-sdist-requirements.txt +++ b/sdk/python/requirements/py3.12-minimal-sdist-requirements.txt @@ -2440,9 +2440,9 @@ pymilvus==2.5.18 \ --hash=sha256:1b78badcfa8d62db7d0b29193fc0422e4676873ff1c745a9d75c2c885d7a7e32 \ --hash=sha256:9e517076068e98dac51c018bc0dfe1f651d936154e2e2d9ad6c7b3dab1164e2d # via feast (pyproject.toml) -pymysql==1.1.2 \ - --hash=sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03 \ - --hash=sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9 +pymysql==1.1.3 \ + --hash=sha256:8164ba62c552f6105f3b11753352d0f16b90d1703ba67d81923d5a8a5d1c5289 \ + --hash=sha256:e70ebf2047a4edf6138cf79c68ad418ef620af65900aa585c5e8bfc95044d43a # via feast (pyproject.toml) pyopenssl==26.1.0 \ --hash=sha256:115563879b2c8ccb207975705d3e491434d8c9d7c79667c902ecbf5f3bbd2ece \ diff --git a/sdk/python/tests/integration/conftest.py b/sdk/python/tests/integration/conftest.py index 5784ad30292..f4f70d31ec3 100644 --- a/sdk/python/tests/integration/conftest.py +++ b/sdk/python/tests/integration/conftest.py @@ -134,7 +134,7 @@ def start_keycloak_server(): @pytest.fixture(scope="session") def mysql_server(): - container = MySqlContainer("mysql:latest") + container = MySqlContainer("mysql:latest", dialect="pymysql") container.start() yield container diff --git a/sdk/python/tests/integration/online_store/test_mongodb_vector_search.py b/sdk/python/tests/integration/online_store/test_mongodb_vector_search.py new file mode 100644 index 00000000000..1a0892aed99 --- /dev/null +++ b/sdk/python/tests/integration/online_store/test_mongodb_vector_search.py @@ -0,0 +1,280 @@ +"""Integration tests for MongoDB Atlas Vector Search in MongoDBOnlineStore. + +These tests require Docker and the ``mongodb/mongodb-atlas-local:8.0.4`` image. +They exercise: + - Vector index creation during ``update()`` + - Write + retrieve round-trip with known embeddings + - ``top_k`` limiting + - Index cleanup on teardown +""" + +from __future__ import annotations + +import time +from datetime import datetime, timedelta +from typing import Any, Dict, List + +import numpy as np +import pytest + +from feast import Entity, FeatureView, RepoConfig +from feast.field import Field +from feast.infra.offline_stores.file_source import FileSource +from feast.infra.online_stores.mongodb_online_store.mongodb import ( + MongoDBOnlineStore, +) +from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import Value as ValueProto +from feast.types import Array, Float32, Int64, String +from feast.value_type import ValueType +from tests.universal.feature_repos.universal.online_store.mongodb import ( + MongoDBAtlasOnlineStoreCreator, +) + +VECTOR_DIM = 3 +NUM_ROWS = 5 +INDEX_WAIT = 5 # seconds to wait for Atlas Search index eventual consistency + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _build_write_batch( + write_data: List[Dict[str, Any]], feature_view: FeatureView +) -> list: + """Build a list of (entity_key, features, event_ts, created_ts) tuples.""" + batch = [] + now = datetime.utcnow() + for row in write_data: + entity_key = EntityKeyProto( + join_keys=["item_id"], + entity_values=[ValueProto(int64_val=row["item_id"])], + ) + features: Dict[str, ValueProto] = { + "title": ValueProto(string_val=row["title"]), + } + emb_proto = ValueProto() + emb_proto.float_list_val.val.extend(row["embedding"]) + features["embedding"] = emb_proto + batch.append((entity_key, features, now, now)) + return batch + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="module") +def atlas_creator(): + """Start an Atlas-local container for the whole module.""" + creator = MongoDBAtlasOnlineStoreCreator(project_name="test_vs") + online_store_dict = creator.create_online_store() + yield online_store_dict, creator + creator.teardown() + + +@pytest.fixture(scope="module") +def repo_config(atlas_creator) -> RepoConfig: + online_store_dict, _ = atlas_creator + return RepoConfig( + project="test_vs", + provider="local", + online_store=online_store_dict, + registry="memory://", + entity_key_serialization_version=3, + ) + + +@pytest.fixture(scope="module") +def item_entity() -> Entity: + return Entity(name="item_id", join_keys=["item_id"], value_type=ValueType.INT64) + + +@pytest.fixture(scope="module") +def feature_view() -> FeatureView: + data_source = FileSource( + path="dummy_path.parquet", timestamp_field="event_timestamp" + ) + return FeatureView( + name="item_embeddings", + entities=[ + Entity( + name="item_id", + join_keys=["item_id"], + value_type=ValueType.INT64, + ) + ], + schema=[ + Field( + name="embedding", + dtype=Array(Float32), + vector_index=True, + vector_length=VECTOR_DIM, + vector_search_metric="cosine", + ), + Field(name="title", dtype=String), + Field(name="item_id", dtype=Int64), + ], + source=data_source, + ttl=timedelta(hours=24), + online=True, + ) + + +@pytest.fixture(scope="module") +def write_data() -> List[Dict[str, Any]]: + """Deterministic embeddings so we know the expected ordering.""" + np.random.seed(42) + rows = [] + for i in range(1, NUM_ROWS + 1): + rows.append( + { + "item_id": i, + "embedding": list(np.random.rand(VECTOR_DIM).astype(np.float32)), + "title": f"Document {i}", + } + ) + return rows + + +@pytest.fixture(scope="module") +def store() -> MongoDBOnlineStore: + """A single store instance shared across the module.""" + return MongoDBOnlineStore() + + +@pytest.fixture(scope="module") +def _setup_index_and_data(store, repo_config, feature_view, item_entity, write_data): + """One-time setup: create index, write data. Runs once for the module.""" + store.update( + config=repo_config, + tables_to_delete=[], + tables_to_keep=[feature_view], + entities_to_delete=[], + entities_to_keep=[item_entity], + partial=False, + ) + batch = _build_write_batch(write_data, feature_view) + store.online_write_batch( + config=repo_config, + table=feature_view, + data=batch, + progress=None, + ) + # Atlas Search indexes are eventually consistent — give the index time + # to pick up the newly written documents before running queries. + time.sleep(INDEX_WAIT) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +@pytest.mark.integration +class TestMongoDBVectorSearch: + """Tests for MongoDB Atlas Vector Search integration.""" + + def test_vector_index_created_on_update( + self, store, repo_config, feature_view, item_entity, _setup_index_and_data + ): + """Verify that update() creates an Atlas vector search index.""" + clxn = store._get_collection(repo_config) + indexes = list(clxn.list_search_indexes()) + vs_index_names = [idx["name"] for idx in indexes] + expected = "item_embeddings__embedding__vs_index" + assert expected in vs_index_names, ( + f"Expected index '{expected}' not found. Got: {vs_index_names}" + ) + + def test_write_and_retrieve_round_trip( + self, store, repo_config, feature_view, write_data, _setup_index_and_data + ): + """Write embeddings then retrieve via vector search.""" + query_embedding = write_data[0]["embedding"] + results = store.retrieve_online_documents_v2( + config=repo_config, + table=feature_view, + requested_features=["embedding", "title"], + embedding=query_embedding, + top_k=3, + ) + assert len(results) == 3 + # Each result is (event_ts, entity_key_proto, feature_dict) + for ts, ek, fdict in results: + assert fdict is not None + assert "distance" in fdict + assert "title" in fdict + assert fdict["distance"].float_val >= 0 + + # The closest match should be the document itself (highest score) + _, _, best = results[0] + assert best["title"].string_val == "Document 1" + + def test_top_k_limiting( + self, store, repo_config, feature_view, write_data, _setup_index_and_data + ): + """Verify that top_k correctly limits the number of results.""" + query_embedding = write_data[0]["embedding"] + + results_2 = store.retrieve_online_documents_v2( + config=repo_config, + table=feature_view, + requested_features=["embedding", "title"], + embedding=query_embedding, + top_k=2, + ) + assert len(results_2) == 2 + + results_all = store.retrieve_online_documents_v2( + config=repo_config, + table=feature_view, + requested_features=["embedding", "title"], + embedding=query_embedding, + top_k=100, + ) + assert len(results_all) == NUM_ROWS + + def test_update_idempotent( + self, store, repo_config, feature_view, item_entity, _setup_index_and_data + ): + """Calling update() a second time should not error (index already exists).""" + # _setup_index_and_data already called update() once. + # A second call must be a no-op for the existing index. + store.update( + config=repo_config, + tables_to_delete=[], + tables_to_keep=[feature_view], + entities_to_delete=[], + entities_to_keep=[item_entity], + partial=False, + ) + clxn = store._get_collection(repo_config) + indexes = list(clxn.list_search_indexes()) + vs_index_names = [idx["name"] for idx in indexes] + expected = "item_embeddings__embedding__vs_index" + assert vs_index_names.count(expected) == 1, ( + f"Expected exactly one '{expected}' index, got: {vs_index_names}" + ) + + def test_index_cleanup_on_teardown( + self, store, repo_config, feature_view, item_entity, _setup_index_and_data + ): + """Verify teardown drops the collection (and thus all indexes). + + This test must run last as it destroys the collection. + """ + store.teardown( + config=repo_config, + tables=[feature_view], + entities=[item_entity], + ) + # After teardown the collection should be dropped + client = store._get_client(repo_config) + online_config = repo_config.online_store + db = client[online_config.database_name] + clxn_name = f"{repo_config.project}_{online_config.collection_suffix}" + assert clxn_name not in db.list_collection_names() diff --git a/sdk/python/tests/integration/online_store/test_mysql_versioning.py b/sdk/python/tests/integration/online_store/test_mysql_versioning.py index d1d132681c5..d7353cd4a08 100644 --- a/sdk/python/tests/integration/online_store/test_mysql_versioning.py +++ b/sdk/python/tests/integration/online_store/test_mysql_versioning.py @@ -77,6 +77,7 @@ def setup_mysql(self): username="root", password="testpass", # pragma: allowlist secret dbname="feast", + dialect="pymysql", ).with_exposed_ports(3306) self.container.start() self.port = self.container.get_exposed_port(3306) diff --git a/sdk/python/tests/unit/online_store/test_mongodb_online_retrieval.py b/sdk/python/tests/unit/online_store/test_mongodb_online_retrieval.py index cfadc3151fd..029746cf442 100644 --- a/sdk/python/tests/unit/online_store/test_mongodb_online_retrieval.py +++ b/sdk/python/tests/unit/online_store/test_mongodb_online_retrieval.py @@ -11,13 +11,13 @@ pytest.importorskip("pymongo") -from feast import FeatureView, Field, FileSource # noqa: E402 +from feast import Entity, FeatureView, Field, FileSource, RepoConfig # noqa: E402 from feast.infra.online_stores.mongodb_online_store.mongodb import ( # noqa: E402 MongoDBOnlineStore, ) from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto -from feast.types import Int64 +from feast.types import Array, Float32, Int64, String from feast.utils import _utc_now from tests.universal.feature_repos.universal.feature_views import TAGS from tests.utils.cli_repo_creator import CliRunner, get_example_repo @@ -303,3 +303,118 @@ def test_convert_raw_docs_ordering(): assert results[0][1]["score"].int64_val == 1 # entity_z assert results[1][1]["score"].int64_val == 2 # entity_a assert results[2][1]["score"].int64_val == 3 # entity_m + + +# --------------------------------------------------------------------------- +# Vector search validation — pure Python, no Docker required +# --------------------------------------------------------------------------- + + +def _make_vector_fv( + *, vector_index: bool = True, vector_length: int = 3 +) -> FeatureView: + """Build a FeatureView with an embedding field for vector search tests.""" + embedding_kwargs: dict = {"name": "embedding", "dtype": Array(Float32)} + if vector_index: + embedding_kwargs.update( + vector_index=True, + vector_length=vector_length, + vector_search_metric="cosine", + ) + return FeatureView( + name="item_embeddings", + entities=[Entity(name="item_id", join_keys=["item_id"])], + schema=[ + Field(**embedding_kwargs), + Field(name="title", dtype=String), + Field(name="item_id", dtype=Int64), + ], + source=FileSource(path="fake.parquet", timestamp_field="event_timestamp"), + ttl=timedelta(hours=24), + ) + + +def _make_repo_config(*, vector_enabled: bool = True) -> RepoConfig: + """Build a RepoConfig with MongoDB online store for vector search tests.""" + return RepoConfig( + project="test_vs", + provider="local", + online_store={ + "type": "mongodb", + "connection_string": "mongodb://localhost:27017", + "vector_enabled": vector_enabled, + }, + registry="memory://", + entity_key_serialization_version=3, + ) + + +def test_retrieve_raises_when_vector_not_enabled(): + """retrieve_online_documents_v2 raises ValueError when vector_enabled=False.""" + store = MongoDBOnlineStore() + config = _make_repo_config(vector_enabled=False) + fv = _make_vector_fv() + + with pytest.raises(ValueError, match="Vector search is not enabled"): + store.retrieve_online_documents_v2( + config=config, + table=fv, + requested_features=["embedding"], + embedding=[1.0, 0.0, 0.0], + top_k=3, + ) + + +def test_retrieve_raises_when_embedding_is_none(): + """retrieve_online_documents_v2 raises ValueError when embedding is None.""" + store = MongoDBOnlineStore() + config = _make_repo_config(vector_enabled=True) + fv = _make_vector_fv() + + with pytest.raises(ValueError, match="embedding vector must be provided"): + store.retrieve_online_documents_v2( + config=config, + table=fv, + requested_features=["embedding"], + embedding=None, + top_k=3, + ) + + +def test_retrieve_raises_when_no_vector_fields(): + """retrieve_online_documents_v2 raises ValueError when no fields have vector_index=True.""" + store = MongoDBOnlineStore() + config = _make_repo_config(vector_enabled=True) + fv = _make_vector_fv(vector_index=False) + + with pytest.raises(ValueError, match="has no fields with vector_index=True"): + store.retrieve_online_documents_v2( + config=config, + table=fv, + requested_features=["embedding"], + embedding=[1.0, 0.0, 0.0], + top_k=3, + ) + + +def test_ensure_vector_indexes_raises_on_missing_vector_length(): + """_ensure_vector_indexes raises ValueError when vector_length is not set.""" + from unittest.mock import MagicMock + + store = MongoDBOnlineStore() + config = _make_repo_config(vector_enabled=True) + online_config = config.online_store + + # Create a feature view where vector_index=True but vector_length=0/None + fv = _make_vector_fv(vector_index=True, vector_length=0) + + # Mock collection so we don't need a real MongoDB connection + mock_collection = MagicMock() + mock_collection.database.list_collection_names.return_value = ["test_vs_latest"] + mock_collection.name = "test_vs_latest" + mock_collection.list_search_indexes.return_value = [] + + with pytest.raises( + ValueError, match="vector_index=True but vector_length is not set" + ): + store._ensure_vector_indexes(mock_collection, [fv], online_config) diff --git a/sdk/python/tests/universal/feature_repos/universal/online_store/mongodb.py b/sdk/python/tests/universal/feature_repos/universal/online_store/mongodb.py index 0c0afd4908a..a3791446232 100644 --- a/sdk/python/tests/universal/feature_repos/universal/online_store/mongodb.py +++ b/sdk/python/tests/universal/feature_repos/universal/online_store/mongodb.py @@ -1,6 +1,6 @@ from typing import Any, Dict -from testcontainers.mongodb import MongoDbContainer +from testcontainers.mongodb import MongoDBAtlasLocalContainer, MongoDbContainer from tests.universal.feature_repos.universal.online_store_creator import ( OnlineStoreCreator, @@ -29,3 +29,29 @@ def create_online_store(self) -> Dict[str, Any]: def teardown(self): self.container.stop() + + +class MongoDBAtlasOnlineStoreCreator(OnlineStoreCreator): + """OnlineStoreCreator backed by ``MongoDBAtlasLocalContainer``. + + This uses the ``mongodb/mongodb-atlas-local`` Docker image which provides + a local Atlas deployment with support for Atlas Search and Vector Search. + """ + + def __init__(self, project_name: str, **kwargs): + super().__init__(project_name) + self.container = MongoDBAtlasLocalContainer( + "mongodb/mongodb-atlas-local:8.0.4", + ) + + def create_online_store(self) -> Dict[str, Any]: + self.container.start() + connection_string = self.container.get_connection_url() + return { + "type": "mongodb", + "connection_string": connection_string, + "vector_enabled": True, + } + + def teardown(self): + self.container.stop() diff --git a/sdk/python/tests/universal/feature_repos/universal/online_store/mysql.py b/sdk/python/tests/universal/feature_repos/universal/online_store/mysql.py index c1ebdf6c984..fa6978b259a 100644 --- a/sdk/python/tests/universal/feature_repos/universal/online_store/mysql.py +++ b/sdk/python/tests/universal/feature_repos/universal/online_store/mysql.py @@ -11,7 +11,7 @@ class MySQLOnlineStoreCreator(OnlineStoreCreator): def __init__(self, project_name: str, **kwargs): super().__init__(project_name) self.container = ( - MySqlContainer("mysql:latest", platform="linux/amd64") + MySqlContainer("mysql:latest", platform="linux/amd64", dialect="pymysql") .with_exposed_ports(3306) .with_env("MYSQL_USER", "root") .with_env("MYSQL_PASSWORD", "test") @@ -37,7 +37,7 @@ class BatchWriteMySQLOnlineStoreCreator(OnlineStoreCreator): def __init__(self, project_name: str, **kwargs): super().__init__(project_name) self.container = ( - MySqlContainer("mysql:latest", platform="linux/amd64") + MySqlContainer("mysql:latest", platform="linux/amd64", dialect="pymysql") .with_exposed_ports(3306) .with_env("MYSQL_USER", "root") .with_env("MYSQL_PASSWORD", "test")