diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 427359befa73..15b074bf647e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,16 +16,16 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 + rev: v6.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 23.7.0 hooks: - id: black - repo: https://github.com/pycqa/flake8 - rev: 3.9.2 # version-scanner: ignore + rev: 6.1.0 # version-scanner: ignore hooks: - id: flake8 diff --git a/packages/bigframes/bigframes/bigquery/_operations/aead.py b/packages/bigframes/bigframes/bigquery/_operations/aead.py new file mode 100644 index 000000000000..1bde8d748638 --- /dev/null +++ b/packages/bigframes/bigframes/bigquery/_operations/aead.py @@ -0,0 +1,138 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# DO NOT MODIFY THIS FILE DIRECTLY. +# This file was generated from: scripts/data/sql-functions/aead.yaml +# by the script: scripts/generate_bigframes_bigquery.py + +from __future__ import annotations + +import datetime +from typing import Any, Optional, TypeVar, Union + +import bigframes.core.col +import bigframes.core.expression as ex +import bigframes.core.sentinels as sentinels +import bigframes.operations as ops +import bigframes.series as series +from bigframes import dtypes +from bigframes.operations import googlesql + +T = TypeVar("T", series.Series, bigframes.core.col.Expression) + +_DECRYPT_BYTES_OP = googlesql.GoogleSqlScalarOp( + "AEAD.DECRYPT_BYTES", + args=(googlesql.ArgSpec(), googlesql.ArgSpec(), googlesql.ArgSpec()), + signature=lambda *args: dtypes.BYTES_DTYPE, +) +_DECRYPT_STRING_OP = googlesql.GoogleSqlScalarOp( + "AEAD.DECRYPT_STRING", + args=(googlesql.ArgSpec(), googlesql.ArgSpec(), googlesql.ArgSpec()), + signature=lambda *args: dtypes.STRING_DTYPE, +) +_ENCRYPT_OP = googlesql.GoogleSqlScalarOp( + "AEAD.ENCRYPT", + args=(googlesql.ArgSpec(), googlesql.ArgSpec(), googlesql.ArgSpec()), + signature=lambda *args: dtypes.BYTES_DTYPE, +) + + +def _apply_googlesql_op( + op: googlesql.GoogleSqlScalarOp, + *args: Any, +) -> Union[series.Series, bigframes.core.col.Expression]: + """Applies a GoogleSQL scalar operator to the given arguments. + + Handles a mix of Series, Expression, and literal inputs. + """ + # Find the first Series to use for alignment + first_series = None + for arg in args: + if isinstance(arg, series.Series): + first_series = arg + break + + if first_series is not None: + processed_args = [] + block = first_series._block + for arg in args: + if isinstance(arg, bigframes.core.col.Expression): + # Project expression onto the block + block, col_id = block.project_expr(arg._expr) + processed_args.append(series.Series(block.select_column(col_id))) + elif arg is sentinels.DEFAULT: + # OmittedArg is handled by GoogleSqlScalarOp in compiler + processed_args.append(bigframes.core.col.Expression(ex.OmittedArg())) + else: + processed_args.append(arg) + + # Apply the n-ary op. _apply_nary_op handles alignment of Series and literals. + result = first_series._apply_nary_op(op, processed_args, ignore_self=True) + result.name = None + return result + + # No Series, return an Expression + expr_args = [] + for arg in args: + if isinstance(arg, bigframes.core.col.Expression): + expr_args.append(arg._expr) + elif arg is sentinels.DEFAULT: + expr_args.append(ex.OmittedArg()) + else: + expr_args.append(ex.const(arg)) + + return bigframes.core.col.Expression(ex.OpExpression(op, tuple(expr_args))) + + +def decrypt_bytes( + keyset: Union[T, Union[bytes, dict]], + ciphertext: Union[T, bytes], + additional_data: Union[T, bytes], +) -> T: + """Uses the matching key from keyset to decrypt ciphertext and verifies the integrity of the data using additional_data. Returns an error if decryption or verification fails.""" + return _apply_googlesql_op( + _DECRYPT_BYTES_OP, + keyset, + ciphertext, + additional_data, + ) # type: ignore + + +def decrypt_string( + keyset: Union[T, Union[bytes, dict]], + ciphertext: Union[T, bytes], + additional_data: Union[T, str], +) -> T: + """Like AEAD.DECRYPT_BYTES, but where additional_data is of type STRING.""" + return _apply_googlesql_op( + _DECRYPT_STRING_OP, + keyset, + ciphertext, + additional_data, + ) # type: ignore + + +def encrypt( + keyset: Union[T, Union[bytes, dict]], + plaintext: Union[T, Union[bytes, str]], + additional_data: Union[T, Union[bytes, str]], +) -> T: + """Encrypts plaintext using the primary cryptographic key in keyset. The algorithm of the primary key must be AEAD_AES_GCM_256. Binds the ciphertext to the context defined by additional_data. Returns NULL if any input is NULL.""" + return _apply_googlesql_op( + _ENCRYPT_OP, + keyset, + plaintext, + additional_data, + ) # type: ignore diff --git a/packages/bigframes/bigframes/core/sentinels.py b/packages/bigframes/bigframes/core/sentinels.py new file mode 100644 index 000000000000..fc2bfac970e5 --- /dev/null +++ b/packages/bigframes/bigframes/core/sentinels.py @@ -0,0 +1,33 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sentinel values used throughout BigFrames.""" + +from __future__ import annotations + +from enum import Enum + + +class Default(Enum): + """Default values used throughout BigFrames. + + When a parameter is set to this, that parameter is explicitly omitted + from the SQL text. This allows for NULL (None in Python) to be explicitly + passed in to optional parameters. + """ + + token = 0 + + +DEFAULT = Default.token diff --git a/packages/bigframes/scripts/data/sql-functions/aead.yaml b/packages/bigframes/scripts/data/sql-functions/aead.yaml new file mode 100644 index 000000000000..6c289a96e886 --- /dev/null +++ b/packages/bigframes/scripts/data/sql-functions/aead.yaml @@ -0,0 +1,131 @@ +urn: extension:google:bq_scalar_functions +scalar_functions: + - name: "aead.decrypt_bytes" + description: "Uses the matching key from keyset to decrypt ciphertext and verifies the integrity of the data using additional_data. Returns an error if decryption or verification fails." + impls: + # Signature: aead.decrypt_bytes:vbin_vbin_vbin + - args: + - name: "keyset" + value: binary + optional: false + keyword_only: false + - name: "ciphertext" + value: binary + optional: false + keyword_only: false + - name: "additional_data" + value: binary + optional: false + keyword_only: false + return: binary + # Signature: aead.decrypt_bytes:struct_vbin_vbin + - args: + - name: "keyset" + value: struct + optional: false + keyword_only: false + - name: "ciphertext" + value: binary + optional: false + keyword_only: false + - name: "additional_data" + value: binary + optional: false + keyword_only: false + return: binary + - name: "aead.decrypt_string" + description: "Like AEAD.DECRYPT_BYTES, but where additional_data is of type STRING." + impls: + # Signature: aead.decrypt_string:vbin_vbin_str + - args: + - name: "keyset" + value: binary + optional: false + keyword_only: false + - name: "ciphertext" + value: binary + optional: false + keyword_only: false + - name: "additional_data" + value: string + optional: false + keyword_only: false + return: string + # Signature: aead.decrypt_string:struct_vbin_str + - args: + - name: "keyset" + value: struct + optional: false + keyword_only: false + - name: "ciphertext" + value: binary + optional: false + keyword_only: false + - name: "additional_data" + value: string + optional: false + keyword_only: false + return: string + - name: "aead.encrypt" + description: "Encrypts plaintext using the primary cryptographic key in keyset. The algorithm of the primary key must be AEAD_AES_GCM_256. Binds the ciphertext to the context defined by additional_data. Returns NULL if any input is NULL." + impls: + # Signature: aead.encrypt:vbin_str_str + - args: + - name: "keyset" + value: binary + optional: false + keyword_only: false + - name: "plaintext" + value: string + optional: false + keyword_only: false + - name: "additional_data" + value: string + optional: false + keyword_only: false + return: binary + # Signature: aead.encrypt:vbin_vbin_vbin + - args: + - name: "keyset" + value: binary + optional: false + keyword_only: false + - name: "plaintext" + value: binary + optional: false + keyword_only: false + - name: "additional_data" + value: binary + optional: false + keyword_only: false + return: binary + # Signature: aead.encrypt:struct_str_str + - args: + - name: "keyset" + value: struct + optional: false + keyword_only: false + - name: "plaintext" + value: string + optional: false + keyword_only: false + - name: "additional_data" + value: string + optional: false + keyword_only: false + return: binary + # Signature: aead.encrypt:struct_vbin_vbin + - args: + - name: "keyset" + value: struct + optional: false + keyword_only: false + - name: "plaintext" + value: binary + optional: false + keyword_only: false + - name: "additional_data" + value: binary + optional: false + keyword_only: false + return: binary diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py b/packages/bigframes/scripts/generate_bigframes_bigquery.py new file mode 100755 index 000000000000..b15aace5f17c --- /dev/null +++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py @@ -0,0 +1,301 @@ +#!/usr/bin/env -S uv run --script +# +# /// script +# dependencies = [ +# "jinja2", +# "pyyaml", +# ] +# /// +# +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pathlib +import re +import yaml +import jinja2 + +# Directory containing the YAML files +DATA_DIR = pathlib.Path("scripts/data/sql-functions") +# Directory where the generated Python files will be placed +OUTPUT_DIR = pathlib.Path("bigframes/bigquery/_operations") + +LICENSE_HEADER = """# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +TEMPLATE = """{{ license_header }} +# +# DO NOT MODIFY THIS FILE DIRECTLY. +# This file was generated from: {{ yaml_path }} +# by the script: {{ script_path }} + +from __future__ import annotations + +import datetime +from typing import Any, Optional, TypeVar, Union + +import bigframes.core.col +import bigframes.core.expression as ex +import bigframes.core.sentinels as sentinels +import bigframes.operations as ops +import bigframes.series as series +from bigframes import dtypes +from bigframes.operations import googlesql + +T = TypeVar("T", series.Series, bigframes.core.col.Expression) + +{% for op in ops %} +{{ op.internal_name }} = googlesql.GoogleSqlScalarOp( + "{{ op.sql_name }}", + args=({{ op.arg_specs }}), + signature={{ op.signature }}, +) +{% endfor %} + +def _apply_googlesql_op( + op: googlesql.GoogleSqlScalarOp, + *args: Any, +) -> Union[series.Series, bigframes.core.col.Expression]: + \"\"\"Applies a GoogleSQL scalar operator to the given arguments. + + Handles a mix of Series, Expression, and literal inputs. + \"\"\" + # Find the first Series to use for alignment + first_series = None + for arg in args: + if isinstance(arg, series.Series): + first_series = arg + break + + if first_series is not None: + processed_args = [] + block = first_series._block + for arg in args: + if isinstance(arg, bigframes.core.col.Expression): + # Project expression onto the block + block, col_id = block.project_expr(arg._expr) + processed_args.append(series.Series(block.select_column(col_id))) + elif arg is sentinels.DEFAULT: + # OmittedArg is handled by GoogleSqlScalarOp in compiler + processed_args.append(bigframes.core.col.Expression(ex.OmittedArg())) + else: + processed_args.append(arg) + + # Apply the n-ary op. _apply_nary_op handles alignment of Series and literals. + result = first_series._apply_nary_op(op, processed_args, ignore_self=True) + result.name = None + return result + + # No Series, return an Expression + expr_args = [] + for arg in args: + if isinstance(arg, bigframes.core.col.Expression): + expr_args.append(arg._expr) + elif arg is sentinels.DEFAULT: + expr_args.append(ex.OmittedArg()) + else: + expr_args.append(ex.const(arg)) + + return bigframes.core.col.Expression(ex.OpExpression(op, tuple(expr_args))) + +{% for func in functions %} +def {{ func.name }}( +{% for arg in func.args %} + {{ arg.name }}: Union[T, {{ arg.type_hint }}]{% if arg.default %} = {{ arg.default }}{% endif %}, +{% endfor %} +) -> T: + \"\"\"{{ func.description }}\"\"\" + return _apply_googlesql_op( + {{ func.op_name }}, +{% for arg in func.args %} + {{ arg.name }}, +{% endfor %} + ) # type: ignore + +{% endfor %} +""" + +DTYPE_MAP = { + "binary": "dtypes.BYTES_DTYPE", + "string": "dtypes.STRING_DTYPE", + "int64": "dtypes.INT_DTYPE", + "float64": "dtypes.FLOAT_DTYPE", + "bool": "dtypes.BOOL_DTYPE", + "geography": "dtypes.GEO_DTYPE", + "json": "dtypes.JSON_DTYPE", + "date": "dtypes.DATE_DTYPE", + "time": "dtypes.TIME_DTYPE", + "datetime": "dtypes.DATETIME_DTYPE", + "timestamp": "dtypes.TIMESTAMP_DTYPE", +} + +PY_TYPE_MAP = { + "binary": "bytes", + "string": "str", + "int64": "int", + "float64": "float", + "bool": "bool", + "geography": "Any", + "json": "Any", + "date": "datetime.date", + "time": "datetime.time", + "datetime": "datetime.datetime", + "timestamp": "datetime.datetime", + "struct": "dict", +} + + +def to_snake_case(name): + # Replace dots with underscores + name = name.replace(".", "_") + # Handle CamelCase to snake_case + name = re.sub(r"(? 1 + else types[0] + ) + default = "sentinels.DEFAULT" if arg_info["optional"] else "" + func_args.append( + { + "name": name, + "type_hint": type_hint, + "default": default, + } + ) + + # Clean up default values for mandatory args + # In Python, mandatory args come first. + for arg in func_args: + if not arg["default"]: + del arg["default"] + + functions_list.append( + { + "name": python_name, + "op_name": internal_op_name, + "description": func_data["description"], + "args": func_args, + } + ) + + # Render and write + output_file.parent.mkdir(parents=True, exist_ok=True) + content = template.render( + license_header=LICENSE_HEADER, + yaml_path=str(yaml_file), + script_path="scripts/generate_bigframes_bigquery.py", + ops=ops_list, + functions=functions_list, + ) + with open(output_file, "w") as f: + f.write(content) + print(f" Generated {output_file}") + + +if __name__ == "__main__": + main() diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock b/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock new file mode 100644 index 000000000000..0d28e42101bd --- /dev/null +++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock @@ -0,0 +1,77 @@ +version = 1 +revision = 3 +requires-python = ">=3.14" + +[manifest] +requirements = [ + { name = "jinja2" }, + { name = "pyyaml" }, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, +] + +[[package]] +name = "markupsafe" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, +] diff --git a/packages/bigframes/specs/bigframes-bigquery-generator.md b/packages/bigframes/specs/bigframes-bigquery-generator.md new file mode 100644 index 000000000000..26d1d45c9f51 --- /dev/null +++ b/packages/bigframes/specs/bigframes-bigquery-generator.md @@ -0,0 +1,101 @@ +# Code generation for bigframes.bigquery + +This document describes code generation for the `bigframes.bigquery` modules. +For detailed specifications on input and output types, refer to +[Contributing to bigframes.bigquery](./bigframes-bigquery-contributing.md). + +## Overview + +The script at `packages/bigframes/scripts/generate_bigframes_bigquery.py` +generates python submodules for the `bigframes.bigquery` module. When run +without any arguments, it iterates through all yaml files at +`packages/bigframes/scripts/data/sql-functions/**/*.yaml` to generate the code. + +The script at `packages/bigframes/scripts/check_bigframes_bigquery.py` iterates +through all the same yaml files and checks that the functions have been included +in the `bigframes.bigquery` module, as the `__init__.py` file requires manual +updates. + +## Running the generator + +Since the dependencies for the script differ from that of bigframes +and its test suite, use the self-contained Python script technique described at +https://docs.astral.sh/uv/guides/scripts/ +to automatically manage dependencies using `uv`. Therefore, the header of the +script will look something like: + +```python +#!/usr/bin/env -S uv run --script +# +# /// script +# dependencies = [ +# "jinja2", +# "pyyaml", +# ] +# /// +# +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# ... +``` + +To run the script: + +```bash +cd packages/bigframes +uv run scripts/generate_bigframes_bigquery.py +``` + +To improve reproducibility, we also check in the uv lock file generated by +running `uv lock --script scripts/generate_bigframes_bigquery.py`. + +## Generated code organization + +The `generate_bigframes_bigquery.py` script generates submodules of +`bigframes.bigquery._operations`, with the full path reflecting the organization +of the YAML files. For example, a YAML file at +`packages/bigframes/scripts/data/sql-functions/aead.yaml` corresponds to a +generated Python module at `bigframes.bigquery._operations.aead`. Likewise, +`packages/bigframes/scripts/data/sql-functions/builtins/bit.yaml` corresponds +to the `bigframes.bigquery._operations.builtins.bit` submodule. + +## Generated module implementation + +Each generated module has all functions defined in the YAML file converted to +the equivalent Python definition, including keyword arguments and docstrings. + +### Code generation + +The code will be templated using the jinja2 template engine. This allows +proposed changes to the templated code to be reviewed more easily. + +### Handling optional arguments + +When the user calls a Python function without specifying the optional +argument, that argument is omitted from the SQL text. To allow for explicit +NULL values to be passed in (None in Python), the default value is specified +to be a default sentinel value enum `bigframes.core.sentinels.DEFAULT`. For +example: + +```python +import bigframes.core.sentinels + +def current_date( + time_zone_expression: str | bigframes.core.sentinels.Default = bigframes.core.sentinels.DEFAULT, +): + ... +``` + +### Input and output types + +Refer to the table in +[Contributing to bigframes.bigquery](./bigframes-bigquery-contributing.md). + +### Internal bigframes operator + +Scalar functions should generate an expression using the `GoogleSqlScalarOp`. +This keeps the implementation as scalar SQL functions consistent. + +Aggregate, analytic, and table-valued functions currently require custom ops. As +such, those functions are currently out of scope for this generator.