Skip to content

Commit 212efc0

Browse files
committed
Clarify virtual catalog
1 parent 8d9c675 commit 212efc0

6 files changed

Lines changed: 127 additions & 13 deletions

File tree

docs/integrations/engines/clickhouse.md

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,45 @@ If a model has many records in each partition, you may see additional performanc
420420

421421
Choose a model's time partitioning granularity based on the characteristics of the data it will process, making sure the total number of partitions is 1000 or fewer.
422422

423+
## Multi-gateway setup
424+
425+
ClickHouse does not have a catalog concept — its fully-qualified table names are two-level (`database.table`), not three-level (`catalog.database.table`).
426+
427+
When a SQLMesh project uses ClickHouse alongside a catalog-aware gateway such as Trino or BigQuery, the two gateway types produce FQNs with different nesting depths. SQLMesh's internal schema tracking requires uniform nesting, so it assigns a **virtual catalog** to ClickHouse models at load time.
428+
429+
### How the virtual catalog works
430+
431+
- SQLMesh automatically detects the nesting mismatch and injects a virtual catalog into each ClickHouse adapter when a catalog-aware gateway is also present.
432+
- ClickHouse models will appear with three-level FQNs in `sqlmesh plan` output and logs — for example, `__ch_prod__.mydb.mytable` for a gateway named `ch_prod`.
433+
- The virtual catalog prefix is **never sent to ClickHouse**. It is stripped from every DDL and DML statement before execution.
434+
- When ClickHouse is the only gateway in a project, no virtual catalog is assigned and models remain two-level.
435+
436+
### Adding a second gateway to an existing ClickHouse-only project
437+
438+
If your project previously used ClickHouse as the only gateway, your models were fingerprinted with 2-level FQNs (`db.table`). Adding a catalog-aware gateway for the first time causes all ClickHouse models to be treated as new versions (their FQNs change to `__{gateway_name}__.db.table`), triggering a full re-materialization on the next `sqlmesh apply`. This is a one-time cost.
439+
440+
### Virtual catalog naming
441+
442+
By default, the virtual catalog name is derived from **the gateway name you chose in your config**, wrapped in double underscores — for example, a gateway named `clickhouse` produces `__clickhouse__`, and a gateway named `ch_prod` produces `__ch_prod__`. The double-underscore wrapping makes it visually clear that this is an internal SQLMesh concept, not a real ClickHouse object.
443+
444+
You can override the default name by setting `virtual_catalog` in your ClickHouse connection configuration:
445+
446+
```yaml
447+
gateways:
448+
clickhouse:
449+
connection:
450+
type: clickhouse
451+
host: my-clickhouse-host
452+
username: default
453+
virtual_catalog: ch_virtual # optional; defaults to __{gateway_name}__ (e.g. __clickhouse__)
454+
trino:
455+
connection:
456+
type: trino
457+
...
458+
```
459+
460+
With this configuration, ClickHouse models will appear as `ch_virtual.mydb.mytable` in plan output instead of `__clickhouse__.mydb.mytable`.
461+
423462
## Local/Built-in Scheduler
424463

425464
**Engine Adapter Type**: `clickhouse`
@@ -446,4 +485,5 @@ If a model has many records in each partition, you may see additional performanc
446485
| `server_host_name` | The ClickHouse server hostname as identified by the CN or SNI of its TLS certificate. Set this to avoid SSL errors when connecting through a proxy or tunnel with a different hostname. | string | N |
447486
| `tls_mode` | Controls advanced TLS behavior. proxy and strict do not invoke ClickHouse mutual TLS connection, but do send client cert and key. mutual assumes ClickHouse mutual TLS auth with a client certificate. | string | N |
448487
| `connection_settings` | Additional [connection settings](https://clickhouse.com/docs/integrations/python#settings-argument) | dict | N |
449-
| `connection_pool_options` | Additional [options](https://clickhouse.com/docs/integrations/python#customizing-the-http-connection-pool) for the HTTP connection pool | dict | N |
488+
| `connection_pool_options` | Additional [options](https://clickhouse.com/docs/integrations/python#customizing-the-http-connection-pool) for the HTTP connection pool | dict | N |
489+
| `virtual_catalog` | Override the virtual catalog name used when ClickHouse runs alongside a catalog-aware gateway (e.g. Trino). Defaults to `__{gateway_name}__`. See [Multi-gateway setup](#multi-gateway-setup) for details. | string | N |

sqlmesh/core/config/connection.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2085,6 +2085,7 @@ class ClickhouseConnectionConfig(ConnectionConfig):
20852085
password: t.Optional[str] = None
20862086
port: t.Optional[int] = None
20872087
cluster: t.Optional[str] = None
2088+
virtual_catalog: t.Optional[str] = None
20882089
connect_timeout: int = 10
20892090
send_receive_timeout: int = 300
20902091
query_limit: int = 0
@@ -2180,7 +2181,11 @@ def cloud_mode(self) -> bool:
21802181

21812182
@property
21822183
def _extra_engine_config(self) -> t.Dict[str, t.Any]:
2183-
return {"cluster": self.cluster, "cloud_mode": self.cloud_mode}
2184+
return {
2185+
"cluster": self.cluster,
2186+
"cloud_mode": self.cloud_mode,
2187+
"virtual_catalog": self.virtual_catalog,
2188+
}
21842189

21852190
@property
21862191
def _static_connection_kwargs(self) -> t.Dict[str, t.Any]:

sqlmesh/core/config/scheduler.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,11 @@ def get_default_catalog_per_gateway(self, context: GenericContext) -> t.Dict[str
155155
for gateway, adapter in unsupported_gateways:
156156
if adapter.supports_virtual_catalog():
157157
adapter.inject_virtual_catalog(gateway)
158-
default_catalogs_per_gateway[gateway] = gateway
158+
# Read the actual virtual catalog name back from the adapter — it may differ
159+
# from the gateway name if the user configured a custom virtual_catalog value.
160+
# inject_virtual_catalog() always sets _default_catalog so default_catalog
161+
# cannot return None at this point.
162+
default_catalogs_per_gateway[gateway] = adapter.default_catalog # type: ignore[assignment]
159163

160164
return default_catalogs_per_gateway
161165

sqlmesh/core/engine_adapter/clickhouse.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,9 @@ def catalog_support(self) -> CatalogSupport:
5656
def supports_virtual_catalog(self) -> bool:
5757
return True
5858

59-
def inject_virtual_catalog(self, catalog: str) -> None:
60-
self._default_catalog = catalog
59+
def inject_virtual_catalog(self, gateway: str) -> None:
60+
configured = self._extra_config.get("virtual_catalog")
61+
self._default_catalog = f"__{gateway}__" if configured is None else configured
6162

6263
@property
6364
def engine_run_mode(self) -> EngineRunMode:

tests/core/engine_adapter/test_clickhouse.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1423,14 +1423,15 @@ def test_virtual_catalog_ddl_stripping(make_mocked_engine_adapter: t.Callable):
14231423
from sqlmesh.core.engine_adapter.shared import CatalogSupport
14241424

14251425
assert adapter.catalog_support == CatalogSupport.SINGLE_CATALOG_ONLY
1426-
assert adapter._default_catalog == "clickhouse_gw"
1426+
# The default synthetic virtual catalog wraps the gateway name in double underscores.
1427+
assert adapter._default_catalog == "__clickhouse_gw__"
14271428

14281429
# create_schema with the virtual catalog prefix must strip the catalog and not raise
1429-
adapter.create_schema("clickhouse_gw.mydb")
1430+
adapter.create_schema("__clickhouse_gw__.mydb")
14301431
assert to_sql_calls(adapter) == ['CREATE DATABASE IF NOT EXISTS "mydb"']
14311432

14321433
# create_schema with a wrong catalog must raise SQLMeshError
1433-
with pytest.raises(SQLMeshError, match="clickhouse_gw"):
1434+
with pytest.raises(SQLMeshError, match="__clickhouse_gw__"):
14341435
adapter.create_schema("wrong_catalog.mydb")
14351436

14361437

@@ -1444,3 +1445,32 @@ def test_supports_virtual_catalog_returns_true():
14441445
)
14451446
assert adapter.supports_virtual_catalog() is True
14461447
assert adapter._default_catalog is None
1448+
1449+
1450+
def test_inject_virtual_catalog_uses_custom_config(make_mocked_engine_adapter: t.Callable):
1451+
"""When virtual_catalog is set in _extra_config, inject_virtual_catalog uses that value
1452+
instead of the synthetic __gateway_name__ default."""
1453+
adapter = make_mocked_engine_adapter(
1454+
ClickhouseEngineAdapter,
1455+
virtual_catalog="my_custom_catalog",
1456+
)
1457+
1458+
adapter.inject_virtual_catalog("clickhouse_gw")
1459+
1460+
# The user-configured value must take precedence over the synthetic default.
1461+
assert adapter._default_catalog == "my_custom_catalog"
1462+
1463+
from sqlmesh.core.engine_adapter.shared import CatalogSupport
1464+
1465+
assert adapter.catalog_support == CatalogSupport.SINGLE_CATALOG_ONLY
1466+
1467+
1468+
def test_clickhouse_connection_config_virtual_catalog_extra_engine_config():
1469+
"""virtual_catalog set on ClickhouseConnectionConfig must appear in _extra_engine_config
1470+
so that the value reaches the adapter's _extra_config dict."""
1471+
from sqlmesh.core.config.connection import ClickhouseConnectionConfig
1472+
1473+
config = ClickhouseConnectionConfig(
1474+
host="localhost", username="user", virtual_catalog="my_catalog"
1475+
)
1476+
assert config._extra_engine_config.get("virtual_catalog") == "my_catalog"

tests/core/test_context.py

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -446,12 +446,12 @@ def test_multi_gateway_catalog_aware_and_unsupported(tmp_path: Path, mocker):
446446
assert "duckdb_gw" in catalog_per_gw
447447
# DuckDB's default catalog is the database filename without extension.
448448
assert catalog_per_gw["duckdb_gw"] == "db"
449-
# ClickHouse gateway must now also have a virtual catalog equal to its gateway name.
449+
# ClickHouse gateway must now also have a virtual catalog wrapped in double underscores.
450450
assert "clickhouse_gw" in catalog_per_gw
451-
assert catalog_per_gw["clickhouse_gw"] == "clickhouse_gw"
451+
assert catalog_per_gw["clickhouse_gw"] == "__clickhouse_gw__"
452452

453-
# The ClickHouse adapter's _default_catalog must be mutated to the virtual catalog name.
454-
assert ch_adapter._default_catalog == "clickhouse_gw"
453+
# The ClickHouse adapter's _default_catalog must be mutated to the synthetic virtual catalog.
454+
assert ch_adapter._default_catalog == "__clickhouse_gw__"
455455

456456
# The adapter's catalog_support must now be SINGLE_CATALOG_ONLY (not UNSUPPORTED),
457457
# so that the set_catalog decorator strips the virtual catalog instead of raising.
@@ -464,7 +464,7 @@ def test_multi_gateway_catalog_aware_and_unsupported(tmp_path: Path, mocker):
464464
)
465465
ch_model = load_sql_based_model(
466466
parse("MODEL(name mydb.ch_tbl, kind FULL, gateway clickhouse_gw);\nSELECT 1 AS col"),
467-
default_catalog="clickhouse_gw",
467+
default_catalog="__clickhouse_gw__",
468468
)
469469

470470
# Both models must have 3-level FQNs so MappingSchema nesting is uniform.
@@ -507,6 +507,40 @@ def test_single_gateway_clickhouse_no_virtual_catalog(mocker):
507507
assert ch_adapter.catalog_support == CatalogSupport.UNSUPPORTED
508508

509509

510+
def test_multi_gateway_clickhouse_custom_virtual_catalog(tmp_path: Path, mocker):
511+
"""When virtual_catalog is configured on the ClickHouse connection, that value is used as the
512+
virtual catalog instead of the synthetic __gateway_name__ default."""
513+
from sqlmesh.core.config.scheduler import BuiltInSchedulerConfig
514+
from sqlmesh.core.engine_adapter.clickhouse import ClickhouseEngineAdapter
515+
from sqlmesh.core.engine_adapter.duckdb import DuckDBEngineAdapter
516+
from sqlmesh.core.engine_adapter.shared import CatalogSupport
517+
518+
db_path = str(tmp_path / "db.db")
519+
520+
duck_adapter = DuckDBEngineAdapter(
521+
lambda *a, **k: __import__("duckdb").connect(db_path),
522+
dialect="duckdb",
523+
)
524+
525+
# Pass virtual_catalog via _extra_config (the same path used by ClickhouseConnectionConfig).
526+
ch_adapter = ClickhouseEngineAdapter(
527+
lambda *a, **k: mocker.NonCallableMock(),
528+
dialect="clickhouse",
529+
virtual_catalog="my_custom_catalog",
530+
)
531+
532+
ctx_mock = mocker.MagicMock()
533+
ctx_mock.engine_adapters = {"duckdb_gw": duck_adapter, "clickhouse_gw": ch_adapter}
534+
535+
scheduler = BuiltInSchedulerConfig()
536+
catalog_per_gw = scheduler.get_default_catalog_per_gateway(ctx_mock)
537+
538+
# The configured virtual_catalog value must be used, not __clickhouse_gw__.
539+
assert catalog_per_gw["clickhouse_gw"] == "my_custom_catalog"
540+
assert ch_adapter._default_catalog == "my_custom_catalog"
541+
assert ch_adapter.catalog_support == CatalogSupport.SINGLE_CATALOG_ONLY
542+
543+
510544
def test_plan_execution_time():
511545
context = Context(config=Config())
512546
context.upsert_model(

0 commit comments

Comments
 (0)