Add table quantization type

lucagiac81 · alwayslove2013 · commit 7bda989feb1f · 2025-03-07T10:18:52.000+08:00
diff --git a/README.md b/README.md
@@ -131,7 +131,11 @@ Options:
   --ef-construction INTEGER       hnsw ef-construction
   --ef-search INTEGER             hnsw ef-search
   --quantization-type [none|bit|halfvec]
-                                  quantization type for vectors
+                                  quantization type for vectors (in index)
+  --table-quantization-type [none|bit|halfvec]
+                                  quantization type for vectors (in table). If
+                                  equal to bit, the parameter
+                                  quantization_type will be set to bit too.
   --custom-case-name TEXT         Custom case name i.e. PerformanceCase1536D50K
   --custom-case-description TEXT  Custom name description
   --custom-case-load-timeout INTEGER
diff --git a/vectordb_bench/backend/clients/pgvector/cli.py b/vectordb_bench/backend/clients/pgvector/cli.py
@@ -82,7 +82,17 @@ class PgVectorTypedDict(CommonTypedDict):
         click.option(
             "--quantization-type",
             type=click.Choice(["none", "bit", "halfvec"]),
-            help="quantization type for vectors",
+            help="quantization type for vectors (in index)",
+            required=False,
+        ),
+    ]
+    table_quantization_type: Annotated[
+        str | None,
+        click.option(
+            "--table-quantization-type",
+            type=click.Choice(["none", "bit", "halfvec"]),
+            help="quantization type for vectors (in table). "
+            "If equal to bit, the parameter quantization_type will be set to bit too.",
             required=False,
         ),
     ]
@@ -146,6 +156,7 @@ def PgVectorIVFFlat(
             lists=parameters["lists"],
             probes=parameters["probes"],
             quantization_type=parameters["quantization_type"],
+            table_quantization_type=parameters["table_quantization_type"],
             reranking=parameters["reranking"],
             reranking_metric=parameters["reranking_metric"],
             quantized_fetch_limit=parameters["quantized_fetch_limit"],
@@ -182,6 +193,7 @@ def PgVectorHNSW(
             maintenance_work_mem=parameters["maintenance_work_mem"],
             max_parallel_workers=parameters["max_parallel_workers"],
             quantization_type=parameters["quantization_type"],
+            table_quantization_type=parameters["table_quantization_type"],
             reranking=parameters["reranking"],
             reranking_metric=parameters["reranking_metric"],
             quantized_fetch_limit=parameters["quantized_fetch_limit"],
diff --git a/vectordb_bench/backend/clients/pgvector/config.py b/vectordb_bench/backend/clients/pgvector/config.py
@@ -80,7 +80,12 @@ def parse_metric(self) -> str:
 
         if d.get(self.quantization_type) is None:
             return d.get("_fallback").get(self.metric_type)
-        return d.get(self.quantization_type).get(self.metric_type)
+        metric = d.get(self.quantization_type).get(self.metric_type)
+        # If using binary quantization for the index, use a bit metric
+        # no matter what metric was selected for vector or halfvec data
+        if self.quantization_type == "bit" and metric is None:
+            return "bit_hamming_ops"
+        return metric
 
     def parse_metric_fun_op(self) -> LiteralString:
         if self.quantization_type == "bit":
@@ -168,21 +173,27 @@ class PgVectorIVFFlatConfig(PgVectorIndexConfig):
     maintenance_work_mem: str | None = None
     max_parallel_workers: int | None = None
     quantization_type: str | None = None
+    table_quantization_type: str | None
     reranking: bool | None = None
     quantized_fetch_limit: int | None = None
     reranking_metric: str | None = None
 
     def index_param(self) -> PgVectorIndexParam:
         index_parameters = {"lists": self.lists}
-        if self.quantization_type == "none":
-            self.quantization_type = None
+        if self.quantization_type == "none" or self.quantization_type is None:
+            self.quantization_type = "vector"
+        if self.table_quantization_type == "none" or self.table_quantization_type is None:
+            self.table_quantization_type = "vector"
+        if self.table_quantization_type == "bit":
+            self.quantization_type = "bit"
         return {
             "metric": self.parse_metric(),
             "index_type": self.index.value,
             "index_creation_with_options": self._optionally_build_with_options(index_parameters),
             "maintenance_work_mem": self.maintenance_work_mem,
             "max_parallel_workers": self.max_parallel_workers,
             "quantization_type": self.quantization_type,
+            "table_quantization_type": self.table_quantization_type,
         }
 
     def search_param(self) -> PgVectorSearchParam:
@@ -212,21 +223,27 @@ class PgVectorHNSWConfig(PgVectorIndexConfig):
     maintenance_work_mem: str | None = None
     max_parallel_workers: int | None = None
     quantization_type: str | None = None
+    table_quantization_type: str | None
     reranking: bool | None = None
     quantized_fetch_limit: int | None = None
     reranking_metric: str | None = None
 
     def index_param(self) -> PgVectorIndexParam:
         index_parameters = {"m": self.m, "ef_construction": self.ef_construction}
-        if self.quantization_type == "none":
-            self.quantization_type = None
+        if self.quantization_type == "none" or self.quantization_type is None:
+            self.quantization_type = "vector"
+        if self.table_quantization_type == "none" or self.table_quantization_type is None:
+            self.table_quantization_type = "vector"
+        if self.table_quantization_type == "bit":
+            self.quantization_type = "bit"
         return {
             "metric": self.parse_metric(),
             "index_type": self.index.value,
             "index_creation_with_options": self._optionally_build_with_options(index_parameters),
             "maintenance_work_mem": self.maintenance_work_mem,
             "max_parallel_workers": self.max_parallel_workers,
             "quantization_type": self.quantization_type,
+            "table_quantization_type": self.table_quantization_type,
         }
 
     def search_param(self) -> PgVectorSearchParam:
diff --git a/vectordb_bench/backend/clients/pgvector/pgvector.py b/vectordb_bench/backend/clients/pgvector/pgvector.py
@@ -94,7 +94,7 @@ def _generate_search_query(self, filtered: bool = False) -> sql.Composed:
         reranking = self.case_config.search_param()["reranking"]
         column_name = (
             sql.SQL("binary_quantize({0})").format(sql.Identifier("embedding"))
-            if index_param["quantization_type"] == "bit"
+            if index_param["quantization_type"] == "bit" and index_param["table_quantization_type"] != "bit"
             else sql.SQL("embedding")
         )
         search_vector = (
@@ -104,7 +104,8 @@ def _generate_search_query(self, filtered: bool = False) -> sql.Composed:
         )
 
         # The following sections assume that the quantization_type value matches the quantization function name
-        if index_param["quantization_type"] is not None:
+        if index_param["quantization_type"] != index_param["table_quantization_type"]:
+            # Reranking makes sense only if table quantization is not "bit"
             if index_param["quantization_type"] == "bit" and reranking:
                 # Embeddings needs to be passed to binary_quantize function if quantization_type is bit
                 search_query = sql.Composed(
@@ -113,7 +114,7 @@ def _generate_search_query(self, filtered: bool = False) -> sql.Composed:
                             """
                             SELECT i.id
                             FROM (
-                                SELECT id, embedding {reranking_metric_fun_op} %s::vector AS distance
+                                SELECT id, embedding {reranking_metric_fun_op} %s::{table_quantization_type} AS distance
                                 FROM public.{table_name} {where_clause}
                                 ORDER BY {column_name}::{quantization_type}({dim})
                             """,
@@ -123,21 +124,25 @@ def _generate_search_query(self, filtered: bool = False) -> sql.Composed:
                             reranking_metric_fun_op=sql.SQL(
                                 self.case_config.search_param()["reranking_metric_fun_op"],
                             ),
+                            search_vector=search_vector,
+                            table_quantization_type=sql.SQL(index_param["table_quantization_type"]),
                             quantization_type=sql.SQL(index_param["quantization_type"]),
                             dim=sql.Literal(self.dim),
                             where_clause=sql.SQL("WHERE id >= %s") if filtered else sql.SQL(""),
                         ),
                         sql.SQL(self.case_config.search_param()["metric_fun_op"]),
                         sql.SQL(
                             """
-                                {search_vector}
+                                {search_vector}::{quantization_type}({dim})
                                 LIMIT {quantized_fetch_limit}
                             ) i
                             ORDER BY i.distance
                             LIMIT %s::int
                             """,
                         ).format(
                             search_vector=search_vector,
+                            quantization_type=sql.SQL(index_param["quantization_type"]),
+                            dim=sql.Literal(self.dim),
                             quantized_fetch_limit=sql.Literal(
                                 self.case_config.search_param()["quantized_fetch_limit"],
                             ),
@@ -160,10 +165,12 @@ def _generate_search_query(self, filtered: bool = False) -> sql.Composed:
                             where_clause=sql.SQL("WHERE id >= %s") if filtered else sql.SQL(""),
                         ),
                         sql.SQL(self.case_config.search_param()["metric_fun_op"]),
-                        sql.SQL(" {search_vector} LIMIT %s::int").format(
+                        sql.SQL(" {search_vector}::{quantization_type}({dim}) LIMIT %s::int").format(
                             search_vector=search_vector,
+                            quantization_type=sql.SQL(index_param["quantization_type"]),
+                            dim=sql.Literal(self.dim),
                         ),
-                    ],
+                    ]
                 )
         else:
             search_query = sql.Composed(
@@ -175,8 +182,12 @@ def _generate_search_query(self, filtered: bool = False) -> sql.Composed:
                         where_clause=sql.SQL("WHERE id >= %s") if filtered else sql.SQL(""),
                     ),
                     sql.SQL(self.case_config.search_param()["metric_fun_op"]),
-                    sql.SQL(" %s::vector LIMIT %s::int"),
-                ],
+                    sql.SQL(" {search_vector}::{quantization_type}({dim}) LIMIT %s::int").format(
+                        search_vector=search_vector,
+                        quantization_type=sql.SQL(index_param["quantization_type"]),
+                        dim=sql.Literal(self.dim),
+                    ),
+                ]
             )
 
         return search_query
@@ -323,7 +334,7 @@ def _create_index(self):
                 )
         with_clause = sql.SQL("WITH ({});").format(sql.SQL(", ").join(options)) if any(options) else sql.Composed(())
 
-        if index_param["quantization_type"] is not None:
+        if index_param["quantization_type"] != index_param["table_quantization_type"]:
             index_create_sql = sql.SQL(
                 """
                 CREATE INDEX IF NOT EXISTS {index_name} ON public.{table_name}
@@ -365,14 +376,23 @@ def _create_table(self, dim: int):
         assert self.conn is not None, "Connection is not initialized"
         assert self.cursor is not None, "Cursor is not initialized"
 
+        index_param = self.case_config.index_param()
+
         try:
             log.info(f"{self.name} client create table : {self.table_name}")
 
             # create table
             self.cursor.execute(
                 sql.SQL(
-                    "CREATE TABLE IF NOT EXISTS public.{table_name} (id BIGINT PRIMARY KEY, embedding vector({dim}));",
-                ).format(table_name=sql.Identifier(self.table_name), dim=dim),
+                    """
+                    CREATE TABLE IF NOT EXISTS public.{table_name}
+                    (id BIGINT PRIMARY KEY, embedding {table_quantization_type}({dim}));
+                    """
+                ).format(
+                    table_name=sql.Identifier(self.table_name),
+                    table_quantization_type=sql.SQL(index_param["table_quantization_type"]),
+                    dim=dim,
+                )
             )
             self.cursor.execute(
                 sql.SQL(
@@ -393,18 +413,41 @@ def insert_embeddings(
         assert self.conn is not None, "Connection is not initialized"
         assert self.cursor is not None, "Cursor is not initialized"
 
+        index_param = self.case_config.index_param()
+
         try:
             metadata_arr = np.array(metadata)
             embeddings_arr = np.array(embeddings)
 
-            with self.cursor.copy(
-                sql.SQL("COPY public.{table_name} FROM STDIN (FORMAT BINARY)").format(
-                    table_name=sql.Identifier(self.table_name),
-                ),
-            ) as copy:
-                copy.set_types(["bigint", "vector"])
-                for i, row in enumerate(metadata_arr):
-                    copy.write_row((row, embeddings_arr[i]))
+            if index_param["table_quantization_type"] == "bit":
+                with self.cursor.copy(
+                    sql.SQL("COPY public.{table_name} FROM STDIN (FORMAT TEXT)").format(
+                        table_name=sql.Identifier(self.table_name)
+                    )
+                ) as copy:
+                    # Same logic as pgvector binary_quantize
+                    for i, row in enumerate(metadata_arr):
+                        embeddings_bit = ""
+                        for embedding in embeddings_arr[i]:
+                            if embedding > 0:
+                                embeddings_bit += "1"
+                            else:
+                                embeddings_bit += "0"
+                        copy.write_row((str(row), embeddings_bit))
+            else:
+                with self.cursor.copy(
+                    sql.SQL("COPY public.{table_name} FROM STDIN (FORMAT BINARY)").format(
+                        table_name=sql.Identifier(self.table_name)
+                    )
+                ) as copy:
+                    if index_param["table_quantization_type"] == "halfvec":
+                        copy.set_types(["bigint", "halfvec"])
+                        for i, row in enumerate(metadata_arr):
+                            copy.write_row((row, np.float16(embeddings_arr[i])))
+                    else:
+                        copy.set_types(["bigint", "vector"])
+                        for i, row in enumerate(metadata_arr):
+                            copy.write_row((row, embeddings_arr[i]))
             self.conn.commit()
 
             if kwargs.get("last_batch"):
diff --git a/vectordb_bench/frontend/config/dbCaseConfigs.py b/vectordb_bench/frontend/config/dbCaseConfigs.py
@@ -823,6 +823,19 @@ class CaseConfigInput(BaseModel):
     ],
 )
 
+CaseConfigParamInput_TableQuantizationType_PgVector = CaseConfigInput(
+    label=CaseConfigParamType.tableQuantizationType,
+    inputType=InputType.Option,
+    inputConfig={
+        "options": ["none", "bit", "halfvec"],
+    },
+    isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
+    in [
+        IndexType.HNSW.value,
+        IndexType.IVFFlat.value,
+    ],
+)
+
 CaseConfigParamInput_max_parallel_workers_PgVectorRS = CaseConfigInput(
     label=CaseConfigParamType.max_parallel_workers,
     displayLabel="Max parallel workers",
@@ -1138,6 +1151,7 @@ class CaseConfigInput(BaseModel):
     CaseConfigParamInput_m,
     CaseConfigParamInput_EFConstruction_PgVector,
     CaseConfigParamInput_QuantizationType_PgVector,
+    CaseConfigParamInput_TableQuantizationType_PgVector,
     CaseConfigParamInput_maintenance_work_mem_PgVector,
     CaseConfigParamInput_max_parallel_workers_PgVector,
 ]
@@ -1149,6 +1163,7 @@ class CaseConfigInput(BaseModel):
     CaseConfigParamInput_Lists_PgVector,
     CaseConfigParamInput_Probes_PgVector,
     CaseConfigParamInput_QuantizationType_PgVector,
+    CaseConfigParamInput_TableQuantizationType_PgVector,
     CaseConfigParamInput_maintenance_work_mem_PgVector,
     CaseConfigParamInput_max_parallel_workers_PgVector,
     CaseConfigParamInput_reranking_PgVector,
diff --git a/vectordb_bench/models.py b/vectordb_bench/models.py
@@ -49,6 +49,7 @@ class CaseConfigParamType(Enum):
     probes = "probes"
     quantizationType = "quantization_type"
     quantizationRatio = "quantization_ratio"
+    tableQuantizationType = "table_quantization_type"
     reranking = "reranking"
     rerankingMetric = "reranking_metric"
     quantizedFetchLimit = "quantized_fetch_limit"