From 77a2fda38e3800f702f9512ac0e0548cfb7f6338 Mon Sep 17 00:00:00 2001 From: aeluce Date: Wed, 20 May 2026 16:19:22 -0500 Subject: [PATCH 1/2] update estuary integration options --- .../{estuary.md => estuary/clickpipes.md} | 24 ++-- .../data-ingestion/etl-tools/estuary/index.md | 44 ++++++ .../etl-tools/estuary/native-protocol.md | 126 ++++++++++++++++++ sidebars.js | 16 ++- 4 files changed, 197 insertions(+), 13 deletions(-) rename docs/integrations/data-ingestion/etl-tools/{estuary.md => estuary/clickpipes.md} (79%) create mode 100644 docs/integrations/data-ingestion/etl-tools/estuary/index.md create mode 100644 docs/integrations/data-ingestion/etl-tools/estuary/native-protocol.md diff --git a/docs/integrations/data-ingestion/etl-tools/estuary.md b/docs/integrations/data-ingestion/etl-tools/estuary/clickpipes.md similarity index 79% rename from docs/integrations/data-ingestion/etl-tools/estuary.md rename to docs/integrations/data-ingestion/etl-tools/estuary/clickpipes.md index bd164421dc6..d2fc158b083 100644 --- a/docs/integrations/data-ingestion/etl-tools/estuary.md +++ b/docs/integrations/data-ingestion/etl-tools/estuary/clickpipes.md @@ -1,8 +1,8 @@ --- -sidebar_label: 'Estuary' -slug: /integrations/estuary -description: 'Stream a variety of sources into ClickHouse with an Estuary integration' -title: 'Connect Estuary with ClickHouse' +sidebar_label: 'Connect with ClickPipes' +slug: /integrations/estuary/clickpipes +description: 'Set up an integration between Estuary and ClickHouse via ClickPipes' +title: 'Ingest Estuary Data via ClickPipes' doc_type: 'guide' integration: - support_level: 'partner' @@ -13,13 +13,15 @@ keywords: ['estuary', 'data ingestion', 'etl', 'pipeline', 'data integration', ' import PartnerBadge from '@theme/badges/PartnerBadge'; -# Connect Estuary with ClickHouse +# Ingest Data from Estuary with ClickPipes -[Estuary](https://estuary.dev/) is a right-time data platform that flexibly combines real-time and batch data in simple-to-setup ETL pipelines. With enterprise-grade security and deployment options, Estuary unlocks durable data flows from SaaS, database, and streaming sources to a variety of destinations, including ClickHouse. +Estuary can connect with ClickHouse via the Kafka ClickPipe. -Estuary connects with ClickHouse via the Kafka ClickPipe. You don't need to maintain your own Kafka ecosystem with this integration. +You don't need to maintain your own Kafka ecosystem with this integration. Instead, Estuary emits new data like Kafka messages. You can configure a Kafka ClickPipe to use Estuary's broker and schema registry information to consume these messages. + +See also [Estuary's direct ClickHouse integration](/integrations/estuary/native). ## Setup guide {#setup-guide} @@ -39,7 +41,7 @@ To move data from your source collections in Estuary to ClickHouse, you will fir 2. Click **+ New Materialization**. -3. Select the **ClickHouse** connector. +3. Select the **ClickHouse Kafka API** connector. 4. Fill out details in the Materialization, Endpoint, and Source Collections sections: @@ -101,10 +103,8 @@ ClickHouse will provision your new data source and start consuming messages from ## Additional resources {#additional-resources} -For more on setting up an integration with Estuary, see Estuary's documentation: +For more on setting up a ClickPipe integration with Estuary, see Estuary's documentation: -* Reference Estuary's [ClickHouse materialization docs](https://docs.estuary.dev/reference/Connectors/materialization-connectors/Dekaf/clickhouse/). +* Reference Estuary's [ClickHouse materialization docs](https://docs.estuary.dev/reference/Connectors/materialization-connectors/Dekaf/clickhouse/) for the ClickPipes integration. * Estuary exposes data as Kafka messages using **Dekaf**. You can learn more about Dekaf [here](https://docs.estuary.dev/guides/dekaf_reading_collections_from_kafka/). - -* To see a list of sources that you can stream into ClickHouse with Estuary, check out [Estuary's capture connectors](https://docs.estuary.dev/reference/Connectors/capture-connectors/). diff --git a/docs/integrations/data-ingestion/etl-tools/estuary/index.md b/docs/integrations/data-ingestion/etl-tools/estuary/index.md new file mode 100644 index 00000000000..1e6792f3656 --- /dev/null +++ b/docs/integrations/data-ingestion/etl-tools/estuary/index.md @@ -0,0 +1,44 @@ +--- +sidebar_label: 'Estuary' +slug: /integrations/estuary +description: 'Stream SaaS, database, and other sources into ClickHouse with an Estuary integration' +title: 'Connect Estuary with ClickHouse' +doc_type: 'guide' +integration: + - support_level: 'partner' + - category: 'data_ingestion' + - website: 'https://estuary.dev' +keywords: ['estuary', 'data ingestion', 'etl', 'pipeline', 'data integration'] +--- + +import PartnerBadge from '@theme/badges/PartnerBadge'; + +# Connect Estuary with ClickHouse + + + +[Estuary](https://estuary.dev/) is a right-time data platform that flexibly combines real-time and batch data in simple-to-setup ETL pipelines. With enterprise-grade security and deployment options, Estuary unlocks durable data flows from SaaS, database, and streaming sources to a variety of destinations, including ClickHouse. + +Estuary provides two main ways to integrate with ClickHouse: +* [Directly connect to your ClickHouse database](/integrations/estuary/native). +* [Connect via Kafka ClickPipes](/integrations/estuary/clickpipes). + +In both cases, Estuary handles data capture and movement. You don't need to maintain your own Kafka ecosystem or other infrastructure. + +## When to choose each integration {#choose-integration-type} + +Estuary's [direct ClickHouse materialization](/integrations/estuary/native) is recommended for most use cases. It is specifically designed to integrate with ClickHouse's native protocol and supports self-hosted deployments as well as ClickHouse Cloud instances. + +Opt for the [ClickPipe integration](/integrations/estuary/clickpipes) instead if you specifically want to manage your pipelines via ClickPipes. This allows you to handle incoming data like Kafka messages. + +## Additional resources {#additional-resources} + +For more on setting up an integration with Estuary, see Estuary's documentation: + +* [Explore Estuary's capabilities](https://docs.estuary.dev/). + +* See reference documentation for Estuary's [direct ClickHouse materialization connector](https://docs.estuary.dev/reference/Connectors/materialization-connectors/ClickHouse/). + +* See reference documentation for Estuary's [Kafka ClickPipe integration](https://docs.estuary.dev/reference/Connectors/materialization-connectors/Dekaf/clickhouse/). + +* To see a list of sources that you can stream into ClickHouse with Estuary, check out [Estuary's capture connectors](https://docs.estuary.dev/reference/Connectors/capture-connectors/). diff --git a/docs/integrations/data-ingestion/etl-tools/estuary/native-protocol.md b/docs/integrations/data-ingestion/etl-tools/estuary/native-protocol.md new file mode 100644 index 00000000000..47345006e57 --- /dev/null +++ b/docs/integrations/data-ingestion/etl-tools/estuary/native-protocol.md @@ -0,0 +1,126 @@ +--- +sidebar_label: 'Direct Materialization Connector' +slug: /integrations/estuary/native +description: 'Integrate between Estuary and ClickHouse with a connector using the native protocol' +title: 'Direct Materialization from Estuary to ClickHouse' +doc_type: 'guide' +integration: + - support_level: 'partner' + - category: 'data_ingestion' + - website: 'https://estuary.dev' +keywords: ['estuary', 'data ingestion', 'etl', 'pipeline', 'data integration'] +--- + +import PartnerBadge from '@theme/badges/PartnerBadge'; + +# Estuary to ClickHouse Direct Materialization + + + +Estuary provides a direct materialization connector with ClickHouse that uses ClickHouse's [native protocol](/interfaces/tcp) and [native format](/interfaces/formats/Native). + +This allows Estuary to: +* Materialize data to both self-hosted and ClickHouse Cloud instances +* Automatically handle tasks like table creation and schema evolution +* Support soft or hard deletes +* Use `ReplacingMergeTree` for standard merge updates or `MergeTree` for delta updates +* Provide exactly-once delivery + +See also [Estuary's Kafka ClickPipe integration](/integrations/estuary/clickpipes) for a ClickPipe workflow. + +## Setup guide {#setup-guide} + +**Prerequisites** + +* An [Estuary account](https://dashboard.estuary.dev/register) +* One or more [**captures**](https://docs.estuary.dev/concepts/captures/) in Estuary that pull data from your desired sources +* A ClickHouse instance, self-hosted or Cloud account +* A ClickHouse database user with credentials + + + +### Configure ClickHouse for integration {#1-configure-clickhouse} + +To set up Estuary's ClickHouse connector, you will need to gather some information from your ClickHouse instance and configure user permissions. + +1. Copy your database's host endpoint. + + For the port, use **9440** if TLS is enabled or **9000** if TLS is disabled. + + Together, the host and port will form the **address** you need to provide to Estuary. + +2. Grant permissions to the database user that Estuary will access. + + To automatically create and manage tables for you, Estuary will need `CREATE`, `SELECT`, `INSERT`, etc permissions on your target database as well as permissions for metadata discovery and partition management. + + You can grant all required permissions by running these SQL commands, replacing `` and `` with your own information: + + ```sql + -- Target database access: CREATE TABLE, DROP TABLE, SELECT, INSERT, TRUNCATE, etc. + GRANT ALL ON .* TO ; + + -- System table access for metadata discovery and partition management. + -- These are NOT covered by the database grant above. + GRANT SELECT ON system.columns TO ; + GRANT SELECT ON system.parts TO ; + GRANT SELECT ON system.tables TO ; + ``` + +3. Optionally restrict user system access to only the target database. + + You can do so with row-level policies. For example: + + ```sql + CREATE ROW POLICY estuary_columns ON system.columns FOR SELECT USING database = '' TO ; + CREATE ROW POLICY estuary_parts ON system.parts FOR SELECT USING database = '' TO ; + CREATE ROW POLICY estuary_tables ON system.tables FOR SELECT USING database = '' TO ; + ``` + +You can then move to Estuary to finish setup. + +### Create an Estuary materialization {#2-create-an-estuary-materialization} + +1. In Estuary's dashboard, go to the [Destinations](https://dashboard.estuary.dev/materializations) page. + +2. Click **+ New Materialization**. + +3. Select the **ClickHouse** connector. + +4. Fill out the **Materialization Details** section. + + * Provide a unique name for your materialization + * Choose a data plane (cloud provider and region) + +5. Fill out **Endpoint Config** details so Estuary can connect to your ClickHouse instance. + + * **Address:** the host and port of your instance + * **Database:** target database name + * **Authentication:** username and password for the database user + + You can also configure optional settings, such as whether to use hard deletes and the SSL mode to use. + +### Configure source collections {#3-configure-source-collections} + +Choose which source(s) you'd like to materialize into ClickHouse in the **Source Collections** section. + +1. Link an existing **capture** or add individual data collections to materialize to ClickHouse. + +2. Select a data collection from the list to configure further if necessary. Customization options include: + + * Choose a different table name for the collection + * Select merge behavior for the collection (whether to use delta updates mode) + * Customize field selection behavior to control which fields are materialized + +3. Once you're happy with how data will be materialized to ClickHouse, click **Next** and **Save and Publish**. + +Estuary will start backfilling data from the selected collections to ClickHouse and then stream updates as they occur. + + + +## Additional resources {#additional-resources} + +For more on setting up a ClickHouse connector with Estuary, see Estuary's documentation: + +* Reference Estuary's [ClickHouse materialization docs](https://docs.estuary.dev/reference/Connectors/materialization-connectors/ClickHouse/). + +* Besides the UI-based workflow provided in these instructions, you can also manage pipeline setup with Estuary via CLI. See Estuary's [guides on `flowctl`](https://docs.estuary.dev/guides/flowctl/ci-cd/) for more on working with Estuary programmatically. diff --git a/sidebars.js b/sidebars.js index 3a2dbf612a6..f56deac0c26 100644 --- a/sidebars.js +++ b/sidebars.js @@ -1149,7 +1149,21 @@ const sidebars = { ], }, 'integrations/data-ingestion/etl-tools/dlt-and-clickhouse', - 'integrations/data-ingestion/etl-tools/estuary', + { + type: 'category', + label: 'Estuary', + className: 'top-nav-item', + collapsed: true, + collapsible: true, + link: { + type: 'doc', + id: 'integrations/data-ingestion/etl-tools/estuary/index', + }, + items: [ + 'integrations/data-ingestion/etl-tools/estuary/native-protocol', + 'integrations/data-ingestion/etl-tools/estuary/clickpipes', + ], + }, { type: 'category', label: 'Fivetran', From dd94fd44fc0d2a809ec12ce6d20578ab4b5430d0 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 27 May 2026 19:43:34 +0200 Subject: [PATCH 2/2] minor tweaks --- .../data-ingestion/etl-tools/estuary/clickpipes.md | 8 ++++---- .../data-ingestion/etl-tools/estuary/index.md | 5 ----- .../etl-tools/estuary/native-protocol.md | 13 ++++++------- 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/docs/integrations/data-ingestion/etl-tools/estuary/clickpipes.md b/docs/integrations/data-ingestion/etl-tools/estuary/clickpipes.md index d2fc158b083..66b4ce4769f 100644 --- a/docs/integrations/data-ingestion/etl-tools/estuary/clickpipes.md +++ b/docs/integrations/data-ingestion/etl-tools/estuary/clickpipes.md @@ -2,7 +2,7 @@ sidebar_label: 'Connect with ClickPipes' slug: /integrations/estuary/clickpipes description: 'Set up an integration between Estuary and ClickHouse via ClickPipes' -title: 'Ingest Estuary Data via ClickPipes' +title: 'Ingest Estuary data using ClickPipes' doc_type: 'guide' integration: - support_level: 'partner' @@ -13,20 +13,20 @@ keywords: ['estuary', 'data ingestion', 'etl', 'pipeline', 'data integration', ' import PartnerBadge from '@theme/badges/PartnerBadge'; -# Ingest Data from Estuary with ClickPipes - Estuary can connect with ClickHouse via the Kafka ClickPipe. You don't need to maintain your own Kafka ecosystem with this integration. Instead, Estuary emits new data like Kafka messages. You can configure a Kafka ClickPipe to use Estuary's broker and schema registry information to consume these messages. -See also [Estuary's direct ClickHouse integration](/integrations/estuary/native). +See [Estuary's direct ClickHouse integration](/integrations/estuary/native) for an alternative. ## Setup guide {#setup-guide} **Prerequisites** +You will need: + * An [Estuary account](https://dashboard.estuary.dev/register) * One or more [**captures**](https://docs.estuary.dev/concepts/captures/) in Estuary that pull data from your desired sources * A ClickHouse Cloud account with ClickPipe permissions diff --git a/docs/integrations/data-ingestion/etl-tools/estuary/index.md b/docs/integrations/data-ingestion/etl-tools/estuary/index.md index 1e6792f3656..53c3cadf506 100644 --- a/docs/integrations/data-ingestion/etl-tools/estuary/index.md +++ b/docs/integrations/data-ingestion/etl-tools/estuary/index.md @@ -13,8 +13,6 @@ keywords: ['estuary', 'data ingestion', 'etl', 'pipeline', 'data integration'] import PartnerBadge from '@theme/badges/PartnerBadge'; -# Connect Estuary with ClickHouse - [Estuary](https://estuary.dev/) is a right-time data platform that flexibly combines real-time and batch data in simple-to-setup ETL pipelines. With enterprise-grade security and deployment options, Estuary unlocks durable data flows from SaaS, database, and streaming sources to a variety of destinations, including ClickHouse. @@ -36,9 +34,6 @@ Opt for the [ClickPipe integration](/integrations/estuary/clickpipes) instead if For more on setting up an integration with Estuary, see Estuary's documentation: * [Explore Estuary's capabilities](https://docs.estuary.dev/). - * See reference documentation for Estuary's [direct ClickHouse materialization connector](https://docs.estuary.dev/reference/Connectors/materialization-connectors/ClickHouse/). - * See reference documentation for Estuary's [Kafka ClickPipe integration](https://docs.estuary.dev/reference/Connectors/materialization-connectors/Dekaf/clickhouse/). - * To see a list of sources that you can stream into ClickHouse with Estuary, check out [Estuary's capture connectors](https://docs.estuary.dev/reference/Connectors/capture-connectors/). diff --git a/docs/integrations/data-ingestion/etl-tools/estuary/native-protocol.md b/docs/integrations/data-ingestion/etl-tools/estuary/native-protocol.md index 47345006e57..d7a837fdb2b 100644 --- a/docs/integrations/data-ingestion/etl-tools/estuary/native-protocol.md +++ b/docs/integrations/data-ingestion/etl-tools/estuary/native-protocol.md @@ -1,8 +1,8 @@ --- -sidebar_label: 'Direct Materialization Connector' +sidebar_label: 'Direct materialization connector' slug: /integrations/estuary/native description: 'Integrate between Estuary and ClickHouse with a connector using the native protocol' -title: 'Direct Materialization from Estuary to ClickHouse' +title: 'Direct materialization from Estuary to ClickHouse' doc_type: 'guide' integration: - support_level: 'partner' @@ -13,8 +13,6 @@ keywords: ['estuary', 'data ingestion', 'etl', 'pipeline', 'data integration'] import PartnerBadge from '@theme/badges/PartnerBadge'; -# Estuary to ClickHouse Direct Materialization - Estuary provides a direct materialization connector with ClickHouse that uses ClickHouse's [native protocol](/interfaces/tcp) and [native format](/interfaces/formats/Native). @@ -26,12 +24,14 @@ This allows Estuary to: * Use `ReplacingMergeTree` for standard merge updates or `MergeTree` for delta updates * Provide exactly-once delivery -See also [Estuary's Kafka ClickPipe integration](/integrations/estuary/clickpipes) for a ClickPipe workflow. +See [Estuary's Kafka ClickPipe integration](/integrations/estuary/clickpipes) for a ClickPipes workflow. ## Setup guide {#setup-guide} **Prerequisites** +You will need: + * An [Estuary account](https://dashboard.estuary.dev/register) * One or more [**captures**](https://docs.estuary.dev/concepts/captures/) in Estuary that pull data from your desired sources * A ClickHouse instance, self-hosted or Cloud account @@ -101,7 +101,7 @@ You can then move to Estuary to finish setup. ### Configure source collections {#3-configure-source-collections} -Choose which source(s) you'd like to materialize into ClickHouse in the **Source Collections** section. +Choose which sources you'd like to materialize into ClickHouse in the **Source Collections** section. 1. Link an existing **capture** or add individual data collections to materialize to ClickHouse. @@ -122,5 +122,4 @@ Estuary will start backfilling data from the selected collections to ClickHouse For more on setting up a ClickHouse connector with Estuary, see Estuary's documentation: * Reference Estuary's [ClickHouse materialization docs](https://docs.estuary.dev/reference/Connectors/materialization-connectors/ClickHouse/). - * Besides the UI-based workflow provided in these instructions, you can also manage pipeline setup with Estuary via CLI. See Estuary's [guides on `flowctl`](https://docs.estuary.dev/guides/flowctl/ci-cd/) for more on working with Estuary programmatically.