From b7c0d131b61edca56c647576afdfc6aab623314b Mon Sep 17 00:00:00 2001 From: p3rf Team Date: Tue, 12 May 2026 07:58:32 -0700 Subject: [PATCH] Add internal BigQuery ODBC benchmarking wrapper. PiperOrigin-RevId: 914284727 --- perfkitbenchmarker/providers/gcp/bigquery.py | 57 ++++++++++++++++++++ perfkitbenchmarker/providers/gcp/flags.py | 1 + 2 files changed, 58 insertions(+) diff --git a/perfkitbenchmarker/providers/gcp/bigquery.py b/perfkitbenchmarker/providers/gcp/bigquery.py index a4f9b71627..e26fe7618a 100644 --- a/perfkitbenchmarker/providers/gcp/bigquery.py +++ b/perfkitbenchmarker/providers/gcp/bigquery.py @@ -57,6 +57,10 @@ 'GOOGLE_JDBC': '--add-opens=java.base/java.nio=ALL-UNNAMED', } +BQ_ODBC_INTERFACES = ['GOOGLE_ODBC'] +BQ_ODBC_CLIENT_FILE = 'odbc-client' + + class GenericClientInterface(edw_service.EdwClientInterface): """Generic Client Interface class for BigQuery. @@ -99,6 +103,8 @@ def GetBigQueryClientInterface( return JavaClientInterface(project_id, dataset_id) if gcp_flags.BQ_CLIENT_INTERFACE.value in BQ_JDBC_INTERFACES: return JdbcClientInterface(project_id, dataset_id) + if gcp_flags.BQ_CLIENT_INTERFACE.value in BQ_ODBC_INTERFACES: + return OdbcClientInterface(project_id, dataset_id) if gcp_flags.BQ_CLIENT_INTERFACE.value == 'PYTHON': return PythonClientInterface(project_id, dataset_id) raise RuntimeError('Unknown BigQuery Client Interface requested.') @@ -278,6 +284,57 @@ def ExecuteQuery( return json.loads(stdout)['performance'], details +class OdbcClientInterface(GenericClientInterface): + """ODBC Client Interface class for BigQuery.""" + + def SetProvisionedAttributes(self, benchmark_spec): + super().SetProvisionedAttributes(benchmark_spec) + self.project_id = re.split( + r'\.', benchmark_spec.edw_service.cluster_identifier + )[0] + self.dataset_id = re.split( + r'\.', benchmark_spec.edw_service.cluster_identifier + )[1] + + def Prepare(self, package_name: str) -> None: + """Prepares the client vm to execute query.""" + # Install system dependencies + self.client_vm.InstallPackages('unixodbc unixodbc-dev unzip libcurl4-openssl-dev libssl-dev') + + # Push the service account file and client binary to runner VM + self.client_vm.InstallPreprovisionedPackageData( + package_name, [FLAGS.gcp_service_account_key_file, BQ_ODBC_CLIENT_FILE], '' + ) + self.client_vm.RemoteCommand(f'chmod +x {BQ_ODBC_CLIENT_FILE}') + + # Dynamically download and extract Google ODBC Driver + driver_url = 'https://storage.googleapis.com/bq-driver-releases/odbc/ODBCDriverforBigQuery_linux_latest.zip' + self.client_vm.RemoteCommand(f'wget -q -O driver.zip {driver_url}') + self.client_vm.RemoteCommand('mkdir -p opt/google-odbc && unzip -q driver.zip -d opt/google-odbc/') + + # Find the driver library and register it in odbcinst.ini + odbcinst_setup = ( + 'DRIVER_PATH=$(find $(pwd)/opt/google-odbc/ -name "libgoogle_cloud_odbc_bq_driver.so" | head -n 1) && ' + 'echo "[Google BigQuery ODBC Driver]\\nDescription=Google BigQuery ODBC Driver\\nDriver=$DRIVER_PATH\\n" | sudo tee /etc/odbcinst.ini' + ) + self.client_vm.RemoteCommand(odbcinst_setup) + + + def ExecuteQuery( + self, query_name: str, print_results: bool = False + ) -> tuple[float, dict[str, Any]]: + """Executes a query and returns performance details.""" + query_command = ( + f'./{BQ_ODBC_CLIENT_FILE} --project {self.project_id} ' + f'--credentials_file {FLAGS.gcp_service_account_key_file} ' + f'--dataset {self.dataset_id} --query_file {query_name}' + ) + stdout, _ = self.client_vm.RemoteCommand(query_command) + details = copy.copy(self.GetMetadata()) + details.update(json.loads(stdout)['details']) + return json.loads(stdout)['performance'], details + + class JavaClientInterface(GenericClientInterface): """Native Java Client Interface class for BigQuery. diff --git a/perfkitbenchmarker/providers/gcp/flags.py b/perfkitbenchmarker/providers/gcp/flags.py index a56fe72b99..0ce79eda87 100644 --- a/perfkitbenchmarker/providers/gcp/flags.py +++ b/perfkitbenchmarker/providers/gcp/flags.py @@ -421,6 +421,7 @@ 'JAVA', 'SIMBA_JDBC', 'GOOGLE_JDBC', + 'GOOGLE_ODBC', 'PYTHON', ], 'The Runtime Interface used when interacting with BigQuery.',