Skip to content

Commit 2627540

Browse files
committed
feat: add VectorChord client implementation and configuration 🎉
- Introduced VectorChord client with support for embedding operations. - Added configuration classes for VectorChord settings and parameters.
1 parent b7dc7d6 commit 2627540

4 files changed

Lines changed: 578 additions & 0 deletions

File tree

‎vectordb_bench/backend/clients/vectorchord/__init__.py‎

Whitespace-only changes.
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
import os
2+
from typing import Annotated, Unpack
3+
4+
import click
5+
from pydantic import SecretStr
6+
7+
from vectordb_bench.backend.clients import DB
8+
9+
from ....cli.cli import (
10+
CommonTypedDict,
11+
cli,
12+
click_parameter_decorators_from_typed_dict,
13+
run,
14+
)
15+
16+
17+
class VectorChordTypedDict(CommonTypedDict):
18+
user_name: Annotated[
19+
str,
20+
click.option("--user-name", type=str, help="Db username", required=True),
21+
]
22+
password: Annotated[
23+
str,
24+
click.option(
25+
"--password",
26+
type=str,
27+
help="Postgres database password",
28+
default=lambda: os.environ.get("POSTGRES_PASSWORD", ""),
29+
show_default="$POSTGRES_PASSWORD",
30+
),
31+
]
32+
33+
host: Annotated[str, click.option("--host", type=str, help="Db host", required=True)]
34+
port: Annotated[
35+
int,
36+
click.option(
37+
"--port",
38+
type=int,
39+
help="Postgres database port",
40+
default=5432,
41+
show_default=True,
42+
required=False,
43+
),
44+
]
45+
db_name: Annotated[str, click.option("--db-name", type=str, help="Db name", required=True)]
46+
47+
48+
class VectorChordRQTypedDict(VectorChordTypedDict):
49+
lists: Annotated[
50+
int | None,
51+
click.option(
52+
"--lists",
53+
type=int,
54+
help="Number of IVF lists for vchordrq index",
55+
),
56+
]
57+
probes: Annotated[
58+
int | None,
59+
click.option(
60+
"--probes",
61+
type=int,
62+
help="Number of probes during search",
63+
default=10,
64+
show_default=True,
65+
),
66+
]
67+
epsilon: Annotated[
68+
float | None,
69+
click.option(
70+
"--epsilon",
71+
type=float,
72+
help="Reranking precision factor (0.0-4.0, higher is more accurate but slower)",
73+
default=1.9,
74+
show_default=True,
75+
),
76+
]
77+
residual_quantization: Annotated[
78+
bool,
79+
click.option(
80+
"--residual-quantization/--no-residual-quantization",
81+
type=bool,
82+
help="Enable residual quantization for improved accuracy",
83+
default=False,
84+
show_default=True,
85+
),
86+
]
87+
spherical_centroids: Annotated[
88+
bool,
89+
click.option(
90+
"--spherical-centroids/--no-spherical-centroids",
91+
type=bool,
92+
help="L2-normalize centroids during K-means (recommended for cosine/IP)",
93+
default=False,
94+
show_default=True,
95+
),
96+
]
97+
build_threads: Annotated[
98+
int | None,
99+
click.option(
100+
"--build-threads",
101+
type=int,
102+
help="Number of threads for index building (range: 1-255)",
103+
),
104+
]
105+
degree_of_parallelism: Annotated[
106+
int | None,
107+
click.option(
108+
"--degree-of-parallelism",
109+
type=int,
110+
help="Degree of parallelism for index build (range: 1-256, default: 32)",
111+
),
112+
]
113+
max_scan_tuples: Annotated[
114+
int | None,
115+
click.option(
116+
"--max-scan-tuples",
117+
type=int,
118+
help="Max tuples to scan before stopping (-1 for unlimited)",
119+
),
120+
]
121+
max_parallel_workers: Annotated[
122+
int | None,
123+
click.option(
124+
"--max-parallel-workers",
125+
type=int,
126+
help="Sets the maximum number of parallel workers for index creation",
127+
),
128+
]
129+
130+
131+
@cli.command()
132+
@click_parameter_decorators_from_typed_dict(VectorChordRQTypedDict)
133+
def VectorChordRQ(
134+
**parameters: Unpack[VectorChordRQTypedDict],
135+
):
136+
from .config import VectorChordConfig, VectorChordRQConfig
137+
138+
run(
139+
db=DB.VectorChord,
140+
db_config=VectorChordConfig(
141+
db_label=parameters["db_label"],
142+
user_name=SecretStr(parameters["user_name"]),
143+
password=SecretStr(parameters["password"]),
144+
host=parameters["host"],
145+
port=parameters["port"],
146+
db_name=parameters["db_name"],
147+
),
148+
db_case_config=VectorChordRQConfig(
149+
lists=parameters["lists"],
150+
probes=parameters["probes"],
151+
epsilon=parameters["epsilon"],
152+
residual_quantization=parameters["residual_quantization"],
153+
spherical_centroids=parameters["spherical_centroids"],
154+
build_threads=parameters["build_threads"],
155+
degree_of_parallelism=parameters["degree_of_parallelism"],
156+
max_scan_tuples=parameters["max_scan_tuples"],
157+
max_parallel_workers=parameters["max_parallel_workers"],
158+
),
159+
**parameters,
160+
)
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
from abc import abstractmethod
2+
from typing import LiteralString, TypedDict
3+
4+
from pydantic import BaseModel, SecretStr
5+
6+
from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
7+
8+
9+
class VectorChordConfigDict(TypedDict):
10+
"""These keys will be directly used as kwargs in psycopg connection string,
11+
so the names must match exactly psycopg API"""
12+
13+
user: str
14+
password: str
15+
host: str
16+
port: int
17+
dbname: str
18+
19+
20+
class VectorChordConfig(DBConfig):
21+
user_name: SecretStr = SecretStr("postgres")
22+
password: SecretStr
23+
host: str = "localhost"
24+
port: int = 5432
25+
db_name: str = "vectordb"
26+
27+
def to_dict(self) -> VectorChordConfigDict:
28+
user_str = self.user_name.get_secret_value()
29+
pwd_str = self.password.get_secret_value()
30+
return {
31+
"host": self.host,
32+
"port": self.port,
33+
"dbname": self.db_name,
34+
"user": user_str,
35+
"password": pwd_str,
36+
}
37+
38+
39+
class VectorChordIndexConfig(BaseModel, DBCaseConfig):
40+
metric_type: MetricType | None = None
41+
create_index_before_load: bool = False
42+
create_index_after_load: bool = True
43+
44+
def parse_metric(self) -> str:
45+
if self.metric_type == MetricType.L2:
46+
return "vector_l2_ops"
47+
if self.metric_type == MetricType.IP:
48+
return "vector_ip_ops"
49+
return "vector_cosine_ops"
50+
51+
def parse_metric_fun_op(self) -> LiteralString:
52+
if self.metric_type == MetricType.L2:
53+
return "<->"
54+
if self.metric_type == MetricType.IP:
55+
return "<#>"
56+
return "<=>"
57+
58+
@abstractmethod
59+
def index_param(self) -> dict: ...
60+
61+
@abstractmethod
62+
def search_param(self) -> dict: ...
63+
64+
@abstractmethod
65+
def session_param(self) -> dict: ...
66+
67+
68+
class VectorChordRQConfig(VectorChordIndexConfig):
69+
index: IndexType = IndexType.VCHORDRQ
70+
# Build parameters (top-level options)
71+
residual_quantization: bool = False
72+
degree_of_parallelism: int | None = None # default 32, range [1, 256]
73+
# Build parameters ([build.internal] section)
74+
lists: int | None = None
75+
spherical_centroids: bool = False
76+
build_threads: int | None = None # range [1, 255]
77+
# PostgreSQL tuning parameter
78+
max_parallel_workers: int | None = None # sets max_parallel_workers & max_parallel_maintenance_workers
79+
# Search parameters (GUCs)
80+
probes: int | None = 10
81+
epsilon: float | None = 1.9 # range [0.0, 4.0]
82+
max_scan_tuples: int | None = None # default -1, range [-1, 2147483647]
83+
84+
def index_param(self) -> dict:
85+
options_parts = []
86+
if self.residual_quantization:
87+
options_parts.append("residual_quantization = true")
88+
if self.degree_of_parallelism is not None:
89+
options_parts.append(f"degree_of_parallelism = {self.degree_of_parallelism}")
90+
options_parts.append("[build.internal]")
91+
if self.lists is not None:
92+
options_parts.append(f"lists = [{self.lists}]")
93+
if self.spherical_centroids:
94+
options_parts.append("spherical_centroids = true")
95+
if self.build_threads is not None:
96+
options_parts.append(f"build_threads = {self.build_threads}")
97+
98+
return {
99+
"metric": self.parse_metric(),
100+
"index_type": self.index.value,
101+
"options": "\n".join(options_parts),
102+
"max_parallel_workers": self.max_parallel_workers,
103+
}
104+
105+
def search_param(self) -> dict:
106+
return {
107+
"metric_fun_op": self.parse_metric_fun_op(),
108+
}
109+
110+
def session_param(self) -> dict:
111+
params = {}
112+
if self.probes is not None:
113+
params["vchordrq.probes"] = str(self.probes)
114+
if self.epsilon is not None:
115+
params["vchordrq.epsilon"] = str(self.epsilon)
116+
if self.max_scan_tuples is not None:
117+
params["vchordrq.max_scan_tuples"] = str(self.max_scan_tuples)
118+
return params
119+
120+
121+
_vectorchord_case_config = {
122+
IndexType.VCHORDRQ: VectorChordRQConfig,
123+
}

0 commit comments

Comments
 (0)