Skip to content

Commit a79957d

Browse files
ulemonsUmberto Sgueglia
andauthored
feat: add fixtures for test tinybird pipes (#3349)
Co-authored-by: Umberto Sgueglia <ulemons@Mac.lan>
1 parent d8fa2f6 commit a79957d

10 files changed

Lines changed: 384 additions & 72 deletions

File tree

pnpm-lock.yaml

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

scripts/cli

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ BUILDERS=$(ls -p $CLI_HOME/builders | grep -v / | sed 's/\.[^.]*$//')
1818
DOCKER_COMPOSE_PROFILE=''
1919
DOCKER_COMPOSE_SCAFFOLD_FILES="-f $CLI_HOME/scaffold.yaml"
2020

21+
TB_HOST="http://localhost:7181"
22+
2123
if [[ -n "${WITH_NGINX}" ]]; then
2224
DOCKER_COMPOSE_PROFILE='--profile nginx'
2325
elif [[ "$WITH_INSIGHTS" == "1" ]]; then
@@ -554,22 +556,19 @@ function wait_for_tinybird() {
554556
say "Waiting for Tinybird to get ready.."
555557

556558
MAX_RETRIES=10
557-
RETRY_DELAY=5
559+
RETRY_DELAY=10
558560
attempt=1
559561

560562
while [ $attempt -le $MAX_RETRIES ]; do
561-
OUTPUT=$(tb local status)
562-
563-
echo "$OUTPUT"
563+
CROWD_TINYBIRD_WORKSPACE_ADMIN_TOKEN=$(curl -s "$TB_HOST/tokens" | jq -r '.workspace_admin_token')
564564

565-
if [[ "$OUTPUT" == *"✓ Tinybird Local is ready!"* ]]; then
566-
say "Tinybird is ready!"
565+
if [ -n "$CROWD_TINYBIRD_WORKSPACE_ADMIN_TOKEN" ]; then
566+
say "Tinybird local is ready!"
567567
break
568-
else
569-
whisper "Not ready yet. Retrying in $RETRY_DELAY seconds..."
570-
sleep $RETRY_DELAY
571568
fi
572569

570+
echo "Not ready yet. Retrying in $RETRY_DELAY seconds..."
571+
sleep $RETRY_DELAY
573572
attempt=$((attempt + 1))
574573
done
575574

@@ -582,14 +581,12 @@ function wait_for_tinybird() {
582581
function migrate_tinybird_local() {
583582
set +e +o pipefail
584583

585-
say "Waiting for Tinybird to be ready..."
586584
wait_for_tinybird
587585

588-
say "Authenticating to tinybird staging..."
589-
cd "$CLI_HOME/../services/libs/tinybird" && tb auth --region us-west-2
590-
586+
say "Authenticating to tinybird local..."
587+
cd "$CLI_HOME/../services/libs/tinybird" && tb auth --host "$TB_HOST" --token "$CROWD_TINYBIRD_WORKSPACE_ADMIN_TOKEN"
588+
591589
set -eo pipefail
592-
return 1
593590
}
594591

595592

@@ -675,9 +672,6 @@ function install_tinybird_cli() {
675672
fi
676673
}
677674

678-
function set_tinybird_workspace_admin_token() {
679-
export CROWD_TINYBIRD_WORKSPACE_ADMIN_TOKEN=$(tb --local token ls --match "admin token" | awk '/^token: / {print $2}')
680-
}
681675

682676
function up_scaffold() {
683677
scaffold_set_up_network "$PROJECT_NAME-bridge" $DOCKER_NETWORK_SUBNET $DOCKER_NETWORK_GATEWAY
@@ -687,16 +681,10 @@ function up_scaffold() {
687681
if [[ "$WITH_INSIGHTS" == "1" ]]; then
688682
download_kafka_connect_http "$CLI_HOME/scaffold/kafka-connect"
689683
install_tinybird_cli
690-
fi
691-
692-
693-
if [[ "$WITH_INSIGHTS" == "1" ]]; then
694684
# first up tinybird, then wait for it to be ready, then set the token env for kafka connect
695685
$_DC --compatibility -p $PROJECT_NAME $DOCKER_COMPOSE_SCAFFOLD_FILES $DOCKER_COMPOSE_PROFILE up -d --build tinybird
696-
wait_for_tinybird
697-
set_tinybird_workspace_admin_token
698686
fi
699-
687+
700688
$_DC --compatibility -p $PROJECT_NAME $DOCKER_COMPOSE_SCAFFOLD_FILES $DOCKER_COMPOSE_PROFILE up -d --build --no-recreate
701689

702690
}
@@ -855,7 +843,7 @@ while test $# -gt 0; do
855843
exit
856844
;;
857845
clean-start-dev)
858-
# INGORED_SERVICES=("python-worker" "job-generator" "discord-ws" "webhook-api" "profiles-worker" "organizations-enrichment-worker" "merge-suggestions-worker" "members-enrichment-worker" "exports-worker" "entity-merging-worker")
846+
INGORED_SERVICES=("python-worker" "job-generator" "discord-ws" "webhook-api" "profiles-worker" "organizations-enrichment-worker" "merge-suggestions-worker" "members-enrichment-worker" "exports-worker" "entity-merging-worker")
859847
CLEAN_START=1
860848
DEV=1
861849
start

scripts/scaffold.insights.yaml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,13 @@ services:
4848

4949
tinybird:
5050
container_name: tinybird-local
51-
image: tinybirdco/tinybird-local:beta
51+
image: tinybirdco/tinybird-local:latest
52+
platform: "linux/amd64"
53+
environment:
54+
- COMPATIBILITY_MODE=1 # <— force Classic version
5255
ports:
5356
- "80:80"
54-
- "7181:7181"
57+
- "7181:7181"
5558
networks:
5659
- crowd-bridge
5760
profiles:

scripts/scaffold/kafka-connect/configure-tinybird-sink.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
set -e
88

9+
CROWD_TINYBIRD_WORKSPACE_ADMIN_TOKEN=$(curl -s "http://tinybird:7181/tokens" | jq -r '.workspace_admin_token')
10+
911
if [ -z "$CROWD_TINYBIRD_WORKSPACE_ADMIN_TOKEN" ] || [ "$CROWD_TINYBIRD_WORKSPACE_ADMIN_TOKEN" = "null" ]; then
1012
echo "Error: Could not fetch Tinybird token. Exiting."
1113
exit 1

scripts/scaffold/kafka-connect/tinybird-local-sink.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ connector.class=io.lenses.streamreactor.connect.http.sink.HttpSinkConnector
33
tasks.max=1
44
topics=activities,activityRelations,members,memberIdentities,organizations,collections,insightsProjects,collectionsInsightsProjects
55
connect.http.method=POST
6-
connect.http.endpoint=http://tinybird:80/v0/events?name={{topic}}
6+
connect.http.endpoint=http://tinybird:7181/v0/events?name={{topic}}
77
connect.http.request.headers=Content-Type: application/json,Authorization: Bearer ${CROWD_TINYBIRD_WORKSPACE_ADMIN_TOKEN}
88
connect.http.batch.count=10
99
connect.http.time.interval=5

scripts/scaffold/sequin.yml

Lines changed: 1 addition & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -45,105 +45,63 @@ sinks:
4545
- name: members_sink
4646
status: active
4747
table: public.members
48-
filters: []
4948
destination:
5049
type: kafka
5150
hosts: kafka:9092
5251
tls: false
5352
topic: members
5453
database: cm-local
55-
max_deliver:
56-
group_column_names:
57-
- id
58-
consumer_start:
59-
position: beginning
6054
- name: activityRelations_sink
6155
status: active
6256
table: public.activityRelations
63-
filters: []
6457
destination:
6558
type: kafka
6659
hosts: kafka:9092
6760
tls: false
6861
topic: activityRelations
6962
database: cm-local
70-
max_deliver:
71-
group_column_names:
72-
- activityId
73-
consumer_start:
74-
position: beginning
7563
- name: segments_sink
7664
status: active
7765
table: public.segments
78-
filters: []
7966
destination:
8067
type: kafka
8168
hosts: kafka:9092
8269
tls: false
8370
topic: segments
8471
database: cm-local
85-
max_deliver:
86-
group_column_names:
87-
- id
88-
consumer_start:
89-
position: beginning
9072
- name: collections_sink
9173
status: active
9274
table: public.collections
93-
filters: []
9475
destination:
9576
type: kafka
9677
hosts: kafka:9092
9778
tls: false
9879
topic: collections
9980
database: cm-local
100-
max_deliver:
101-
group_column_names:
102-
- id
103-
consumer_start:
104-
position: beginning
10581
- name: insightsProjects_sink
10682
status: active
10783
table: public.insightsProjects
108-
filters: []
10984
destination:
11085
type: kafka
11186
hosts: kafka:9092
11287
tls: false
11388
topic: insightsProjects
11489
database: cm-local
115-
max_deliver:
116-
group_column_names:
117-
- id
118-
consumer_start:
119-
position: beginning
12090
- name: collectionsInsightsProjects_sink
12191
status: active
12292
table: public.collectionsInsightsProjects
123-
filters: []
12493
destination:
12594
type: kafka
12695
hosts: kafka:9092
12796
tls: false
12897
topic: collectionsInsightsProjects
12998
database: cm-local
130-
max_deliver:
131-
group_column_names:
132-
- id
133-
consumer_start:
134-
position: beginning
13599
- name: organizations_sink
136100
status: active
137101
table: public.organizations
138-
filters: []
139102
destination:
140103
type: kafka
141104
hosts: kafka:9092
142105
tls: false
143106
topic: organizations
144-
database: cm-local
145-
max_deliver:
146-
group_column_names:
147-
- id
148-
consumer_start:
149-
position: beginning
107+
database: cm-local
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
TB_HOST_LOCAL=
2+
TB_TOKEN_LOCAL=
3+
TB_HOST_STAGING=
4+
TB_TOKEN_STAGING=
5+
TB_HOST_PRODUCTION=
6+
TB_TOKEN_PRODUCTION=

services/libs/tinybird/README.md

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,118 @@ Until we move fully to **Tinybird Forward** (which will support migration script
154154
2. **Pipe-by-pipe updates** for zero downtime where #1 is not enough
155155

156156
Pick the method that best fits your workflow and datasource complexity.
157+
158+
# Testing Tinybird Pipes Locally
159+
160+
This guide explains how to test a Tinybird data pipeline ("pipe") on your local Tinybird environment. We will fetch sample data (fixtures) from a staging Tinybird workspace and use it to run and verify a pipe locally. The steps below are written for a developer who may not be familiar with Tinybird, and they are organized in a clear, numbered format for easy follow-up.
161+
162+
## Prerequisites
163+
164+
- **Tinybird CLI and Local Environment:** Make sure you have the Tinybird CLI (`tb`) installed.
165+
To start a local Tinybird instance, run the following Docker command:
166+
```bash
167+
docker run --platform linux/amd64 -p 7181:7181 --name tinybird-classic-local \
168+
-e COMPATIBILITY_MODE=1 -d tinybirdco/tinybird-local:latest
169+
```
170+
This will start Tinybird Local at `http://localhost:7181`.
171+
172+
- **Staging Workspace Access:** You need access to a Tinybird staging (or production) workspace with an API token that has permission to read data (we'll use it to export data).
173+
- **Project Files:** Ensure you have your Tinybird project files available locally – this includes the data source definitions and pipe (.pipe) files you plan to test. For example, if you plan to test a pipe that uses a data source named `insightsProjects`, you should have the corresponding `insightsProjects.datasource` file (or have created that data source) in your local workspace. Similarly, have the pipe file (e.g. `activities_filtered.pipe` and any related pipes) ready in your project directory.
174+
175+
## Steps to Test the Tinybird Integration Locally
176+
177+
1. **Set Up Environment Variables and Authenticate**
178+
Instead of exporting variables directly in the shell (which can leak tokens into your shell history), we recommend using a `.env` file.
179+
180+
First, copy the example file and fill in the values:
181+
```bash
182+
cp .env.example .env
183+
```
184+
185+
Edit `.env` and set your staging token. For the local token, you need to fetch it from the local Tinybird API with the following command:
186+
```bash
187+
curl -s "http://localhost:7181/tokens" | jq -r '.workspace_admin_token'
188+
```
189+
Copy the output and paste it into the `TB_TOKEN_LOCAL` entry inside `.env`.
190+
191+
Example `.env`:
192+
```bash
193+
TB_HOST_LOCAL=http://localhost:7181
194+
TB_TOKEN_LOCAL=eyJhbGciOiJIUzI1NiIsInR5cCI6...
195+
196+
TB_HOST_STAGING=https://api.us-west-2.aws.tinybird.co
197+
TB_TOKEN_STAGING=<YOUR_STAGING_TOKEN>
198+
199+
TB_HOST_PRODUCTION=https://api.us-west-2.aws.tinybird.co
200+
TB_TOKEN_PRODUCTION=<YOUR_PRODUCTION_TOKEN>
201+
```
202+
203+
Then load the environment variables into your current shell (Linux/macOS, or Windows with Git Bash/WSL):
204+
```bash
205+
set -a
206+
source .env
207+
set +a
208+
```
209+
210+
Now you can authenticate with the Tinybird CLI:
211+
```bash
212+
# Authenticate to local Tinybird workspace
213+
tb auth --host "$TB_HOST_LOCAL" --token "$TB_TOKEN_LOCAL"
214+
215+
# Authenticate to staging Tinybird workspace
216+
tb auth --host "$TB_HOST_STAGING" --token "$TB_TOKEN_STAGING"
217+
218+
# Authenticate to production Tinybird workspace
219+
tb auth --host "$TB_HOST_PRODUCTION" --token "$TB_TOKEN_PRODUCTION"
220+
```
221+
222+
⚠️ **Note for Windows PowerShell users:**
223+
`source` and `set -a` are not available in PowerShell. We recommend using [dotenvx](https://dotenvx.sh/) or an equivalent approach:
224+
```powershell
225+
dotenvx run -f .env -- tb auth --host $env:TB_HOST_LOCAL --token $env:TB_TOKEN_LOCAL
226+
```
227+
228+
2. **Export Fixture Data from Staging**
229+
Next, fetch some sample data from the staging environment to use in your local test. We will use the Tinybird SQL API via a `curl` command to retrieve data. In this example, we select up to 200 rows from the `insightsProjects` data source in the staging workspace and save it to a local file:
230+
```bash
231+
curl -s -H "Authorization: Bearer $TB_TOKEN_STAGING" \
232+
--data-urlencode "q=SELECT id, name, slug, description, segmentId, createdAt, updatedAt, deletedAt, logoUrl, organizationId, website, github, linkedin, twitter, widgets, repositories, enabled, isLF, keywords FROM insightsProjects LIMIT 200 FORMAT JSONEachRow" \
233+
"https://api.us-west-2.aws.tinybird.co/v0/sql" \
234+
| jq -c '{record: .}' > insightsProjects.ndjson
235+
```
236+
237+
⚠️ Check if this data is not already present in the `fixtures` folder. If not, place the output in the `fixtures` folder and commit it.
238+
**for production you must use the ${TB_TOKEN_PRODUCTION}
239+
240+
3. **Switch Back to Local Workspace**
241+
Now that we have the fixture data, we need to switch the Tinybird CLI context back to the local environment before importing data and pushing pipes:
242+
```bash
243+
tb auth --host "$TB_HOST_LOCAL" --token "$TB_TOKEN_LOCAL"
244+
```
245+
246+
4. **Import the Fixture Data into Local Datasource**
247+
Append the data you downloaded to the corresponding data source in your local Tinybird workspace:
248+
```bash
249+
tb datasource append insightsProjects --file insightsProjects.ndjson
250+
```
251+
252+
5. **Push the Pipe to the Local Environment**
253+
Push the Tinybird pipe you want to test into your local Tinybird workspace:
254+
```bash
255+
tb push pipes/activities_filtered.pipe
256+
```
257+
258+
6. **Test the Local API Endpoint**
259+
Finally, call the pipe’s API endpoint on your local Tinybird instance to verify it works with the local data:
260+
```bash
261+
curl -s "http://localhost:7181/v0/pipes/activities_count.json" \
262+
-G \
263+
--data-urlencode "project=umbertotest5" \
264+
--data-urlencode "token=$TB_TOKEN_LOCAL"
265+
```
266+
267+
## Additional Tips
268+
269+
- If the `curl` request in step 6 returns an authentication error, double-check that you're using the correct token.
270+
- You can iterate on your pipe queries locally and rerun the endpoint call to quickly test changes.
271+
- Remember that any changes made locally (like new data or modified pipes) are not automatically reflected in the cloud workspace.

0 commit comments

Comments
 (0)