Skip to content

Commit 0f25083

Browse files
authored
Feature/real time sink patterns (#120)
* Update version to 0.18.0, add new execution strategies including duration-based and ramp load patterns, enhance validation capabilities, and improve performance metrics collection. Introduce new test plans and examples for various execution strategies, along with updates to documentation and Docker configurations. * feat(foreign-key): implement strategies and utilities for foreign keys - Add CardinalityConfigBuilder and NullabilityConfigBuilder for configuring cardinality and nullability in relationships - Introduce new processors for foreign key uniqueness and connection resolution - Add comprehensive tests for new features and update existing test cases - Enhance documentation with new YAML examples and integration tests for execution strategies, including performance test examples This implementation enables flexible foreign key management in data generation, supporting various relationship patterns and improving data integrity validation. * Fix cardinality and nullability config to be at foreign key target level and not top level of foreign keys * fix: revert back plan models changes and unified yaml, cleanup imports * feat: enhance CI/CD workflows for multi-architecture builds - Updated the build workflow to support both amd64 and arm64 architectures for packaging the application as a Debian package. - Introduced a new workflow for testing Linux ARM64 builds, including setup for QEMU and Docker Buildx for cross-platform compatibility. - Adjusted artifact naming conventions to clearly indicate architecture in the output files. * fix: update CI/CD workflows to install fakeroot before jpackage - Modified the build and test workflows to include the installation of fakeroot prior to executing the jpackage command, ensuring successful packaging of the application for multi-architecture builds. * fix: remove OS specific installation, fix integration test, update quick start * Enhance foreign key strategies with deterministic behavior using hash-based approaches - Updated ForeignKeyEndToEndIntegrationTest to assert expected null rows for foreign keys based on deterministic hash values. - Modified CardinalityStrategy, DistributedSamplingStrategy, GenerationModeStrategy, and NullabilityStrategy to utilize hash-based methods for consistent results across different Spark environments. - Introduced SimplePercentileCalculator for efficient percentile calculations in performance metrics, replacing the deprecated T-Digest. - Added tests for new deterministic behaviors in foreign key strategies and updated existing tests for consistency. - Improved DataGenerator to support deterministic SQL generation with seed-based hash functions. * Refactor YAML plan and task handling, enhance foreign key processing - Removed obsolete integration test steps from GitHub Actions workflow. - Improved logging in StepDataCoordinator for better debugging during record generation. - Updated CardinalityCountAdjustmentProcessor to ensure only foreign key target steps are modified, preventing unintended changes. - Added new YAML plan and task files for account balances and transactions, including validation of foreign key relationships. - Introduced integration tests for YAML plan execution to verify record counts and foreign key integrity.
1 parent 27594a8 commit 0f25083

194 files changed

Lines changed: 17432 additions & 3724 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/build.yml

Lines changed: 0 additions & 218 deletions
Original file line numberDiff line numberDiff line change
@@ -52,221 +52,3 @@ jobs:
5252
ORG_GRADLE_PROJECT_signingKey: ${{ secrets.ORG_GRADLE_PROJECT_SIGNINGKEY }}
5353
ORG_GRADLE_PROJECT_signingKeyId: ${{ secrets.ORG_GRADLE_PROJECT_SIGNINGKEYID }}
5454
ORG_GRADLE_PROJECT_signingPassword: ${{ secrets.ORG_GRADLE_PROJECT_SIGNINGPASSWORD }}
55-
- name: Upload jpackage jar
56-
uses: actions/upload-artifact@v4
57-
with:
58-
name: jars
59-
path: "app/build/libs/data-caterer.jar"
60-
overwrite: true
61-
62-
osx:
63-
needs: build
64-
strategy:
65-
matrix:
66-
include:
67-
- runner: macos-13 # Intel x64
68-
arch: x64
69-
arch_name: x86_64
70-
- runner: macos-14 # Apple Silicon arm64
71-
arch: aarch64
72-
arch_name: aarch64
73-
runs-on: ${{ matrix.runner }}
74-
75-
steps:
76-
- uses: actions/checkout@v4
77-
with:
78-
fetch-depth: 2
79-
- name: Set version
80-
run: |
81-
BASE_VERSION=$(grep version gradle.properties | cut -d= -f2)
82-
COMMIT_HASH=$(git rev-parse --short HEAD)
83-
84-
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
85-
APP_VERSION="$BASE_VERSION"
86-
else
87-
APP_VERSION="$BASE_VERSION-$COMMIT_HASH"
88-
fi
89-
90-
echo "APP_VERSION=$APP_VERSION" >> $GITHUB_ENV
91-
echo "Building version: $APP_VERSION"
92-
- uses: actions/setup-java@v4
93-
with:
94-
java-version: '21'
95-
java-package: jdk
96-
architecture: ${{ matrix.arch }}
97-
distribution: oracle
98-
- name: Download fat jar
99-
uses: actions/download-artifact@v4
100-
with:
101-
name: jars
102-
path: app/build/libs/
103-
- name: Package jar as dmg installer
104-
run: 'jpackage --main-jar data-caterer.jar "@misc/jpackage/jpackage.cfg" "@misc/jpackage/jpackage-mac.cfg"'
105-
- name: Rename DMG with version and architecture
106-
run: mv DataCaterer-*.dmg DataCaterer-${{ env.APP_VERSION }}-macos-${{ matrix.arch_name }}.dmg
107-
- name: Upload dmg
108-
uses: actions/upload-artifact@v4
109-
with:
110-
name: data-caterer-macos-${{ matrix.arch_name }}
111-
path: "DataCaterer-${{ env.APP_VERSION }}-macos-${{ matrix.arch_name }}.dmg"
112-
overwrite: true
113-
114-
windows:
115-
needs: build
116-
runs-on: [windows-latest]
117-
118-
steps:
119-
- uses: actions/checkout@v4
120-
with:
121-
fetch-depth: 2
122-
- name: Set version
123-
run: |
124-
$BASE_VERSION = (Get-Content gradle.properties | Select-String '^version=' | ForEach-Object { $_ -replace 'version=','' }).Trim()
125-
$COMMIT_HASH = git rev-parse --short HEAD
126-
127-
if ("${{ github.ref }}" -eq "refs/heads/main") {
128-
$APP_VERSION = $BASE_VERSION
129-
} else {
130-
$APP_VERSION = "$BASE_VERSION-$COMMIT_HASH"
131-
}
132-
133-
echo "APP_VERSION=$APP_VERSION" >> $env:GITHUB_ENV
134-
Write-Output "Building version: $APP_VERSION"
135-
- uses: actions/setup-java@v4
136-
with:
137-
java-version: '21'
138-
java-package: jdk
139-
architecture: x64
140-
distribution: oracle
141-
- name: Download fat jar
142-
uses: actions/download-artifact@v4
143-
with:
144-
name: jars
145-
path: app/build/libs/
146-
- name: Package jar as exe
147-
run: 'jpackage --main-jar data-caterer.jar "@misc/jpackage/jpackage.cfg" "@misc/jpackage/jpackage-windows.cfg"'
148-
- name: Rename EXE with version and architecture
149-
run: mv DataCaterer-*.exe DataCaterer-$env:APP_VERSION-windows-x86_64.exe
150-
- name: Upload installer
151-
uses: actions/upload-artifact@v4
152-
with:
153-
name: data-caterer-windows-x86_64
154-
path: "DataCaterer-${{ env.APP_VERSION }}-windows-x86_64.exe"
155-
overwrite: true
156-
157-
linux-amd64:
158-
needs: build
159-
runs-on: [ubuntu-latest]
160-
161-
steps:
162-
- uses: actions/checkout@v4
163-
with:
164-
fetch-depth: 2
165-
- name: Set version
166-
run: |
167-
BASE_VERSION=$(grep version gradle.properties | cut -d= -f2)
168-
COMMIT_HASH=$(git rev-parse --short HEAD)
169-
170-
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
171-
APP_VERSION="$BASE_VERSION"
172-
else
173-
APP_VERSION="$BASE_VERSION-$COMMIT_HASH"
174-
fi
175-
176-
echo "APP_VERSION=$APP_VERSION" >> $GITHUB_ENV
177-
echo "Building version: $APP_VERSION"
178-
- uses: actions/setup-java@v4
179-
with:
180-
java-version: '21'
181-
java-package: jdk
182-
architecture: ${{ matrix.arch }}
183-
distribution: oracle
184-
- name: Download fat jar
185-
uses: actions/download-artifact@v4
186-
with:
187-
name: jars
188-
path: app/build/libs/
189-
- name: Package jar as debian package (amd64)
190-
run: 'jpackage --main-jar data-caterer.jar "@misc/jpackage/jpackage.cfg" "@misc/jpackage/jpackage-linux.cfg"'
191-
- name: Rename deb with version
192-
run: |
193-
DEB_FILE=$(ls datacaterer_*_amd64.deb 2>/dev/null | head -n 1)
194-
if [ -n "$DEB_FILE" ]; then
195-
echo "Found deb file: $DEB_FILE"
196-
mv "$DEB_FILE" datacaterer_${{ env.APP_VERSION }}_amd64.deb
197-
echo "Renamed to: datacaterer_${{ env.APP_VERSION }}_amd64.deb"
198-
else
199-
echo "No deb file found"
200-
echo "Current directory:"
201-
pwd
202-
echo "Files in current directory:"
203-
ls -lart
204-
exit 1
205-
fi
206-
- name: Upload deb (amd64)
207-
uses: actions/upload-artifact@v4
208-
with:
209-
name: data-caterer-linux-amd64
210-
path: "datacaterer_${{ env.APP_VERSION }}_amd64.deb"
211-
overwrite: true
212-
213-
linux-arm64:
214-
needs: build
215-
runs-on: [ubuntu-latest]
216-
217-
steps:
218-
- uses: actions/checkout@v4
219-
with:
220-
fetch-depth: 2
221-
- name: Set version
222-
run: |
223-
BASE_VERSION=$(grep version gradle.properties | cut -d= -f2)
224-
COMMIT_HASH=$(git rev-parse --short HEAD)
225-
226-
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
227-
APP_VERSION="$BASE_VERSION"
228-
else
229-
APP_VERSION="$BASE_VERSION-$COMMIT_HASH"
230-
fi
231-
232-
echo "APP_VERSION=$APP_VERSION" >> $GITHUB_ENV
233-
echo "Building version: $APP_VERSION"
234-
- name: Set up QEMU
235-
uses: docker/setup-qemu-action@v3
236-
with:
237-
platforms: arm64
238-
- name: Set up Docker Buildx
239-
uses: docker/setup-buildx-action@v3
240-
- name: Download fat jar
241-
uses: actions/download-artifact@v4
242-
with:
243-
name: jars
244-
path: app/build/libs/
245-
- name: Package jar as debian package (arm64)
246-
run: |
247-
docker run --rm --platform linux/arm64 \
248-
-v $(pwd):/workspace \
249-
-w /workspace \
250-
arm64v8/eclipse-temurin:21-jdk \
251-
bash -c "apt-get update && apt-get install -y fakeroot && jpackage --main-jar data-caterer.jar '@misc/jpackage/jpackage.cfg' '@misc/jpackage/jpackage-linux.cfg'"
252-
- name: Rename output to indicate version and architecture (arm64)
253-
run: |
254-
DEB_FILE=$(ls datacaterer_*_arm64.deb 2>/dev/null | head -n 1)
255-
if [ -n "$DEB_FILE" ]; then
256-
echo "Found deb file: $DEB_FILE"
257-
mv "$DEB_FILE" datacaterer_${{ env.APP_VERSION }}_arm64.deb
258-
echo "Renamed to: datacaterer_${{ env.APP_VERSION }}_arm64.deb"
259-
else
260-
echo "No deb file found"
261-
echo "Current directory:"
262-
pwd
263-
echo "Files in current directory:"
264-
ls -lart
265-
exit 1
266-
fi
267-
- name: Upload deb (arm64)
268-
uses: actions/upload-artifact@v4
269-
with:
270-
name: data-caterer-linux-arm64
271-
path: "datacaterer_${{ env.APP_VERSION }}_arm64.deb"
272-
overwrite: true

.github/workflows/check.yml

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,3 @@ jobs:
2323
- name: Run gradle integration tests
2424
run: |
2525
./gradlew :app:integrationTest --info
26-
- name: Run intsa-integration tests
27-
id: tests
28-
uses: data-catering/insta-integration@v4
29-
- name: Print results
30-
run: |
31-
echo "Records generated: ${{ steps.tests.outputs.num_records_generated }}"
32-
echo "Successful validations: ${{ steps.tests.outputs.num_success_validations }}"
33-
echo "Failed validations: ${{ steps.tests.outputs.num_failed_validations }}"
34-
echo "Number of validations: ${{ steps.tests.outputs.num_validations }}"
35-
echo "Validation success rate: ${{ steps.tests.outputs.validation_success_rate }}"
36-
37-
if [ "${{ steps.tests.outputs.num_failed_validations }}" -gt 0 ]; then
38-
exit 1
39-
fi

README.md

Lines changed: 33 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -34,35 +34,39 @@ and deep dive into issues [from the generated report](https://data.catering/late
3434

3535
![Basic flow](misc/design/basic_data_caterer_flow_medium.gif)
3636

37-
## Quick start
38-
39-
1. Docker
40-
```shell
41-
docker run -d -i -p 9898:9898 -e DEPLOY_MODE=standalone --name datacaterer datacatering/data-caterer:0.17.3
42-
```
43-
[Open localhost:9898](http://localhost:9898).
44-
1. [Run Scala/Java examples](#run-scalajava-examples)
45-
```shell
46-
git clone git@github.com:data-catering/data-caterer-example.git
47-
cd data-caterer-example && ./run.sh
48-
#check results under docker/sample/report/index.html folder
49-
```
50-
1. UI App Downloads (Nightly builds from `main` branch)
51-
- **macOS**:
52-
- [Intel (x86_64)](https://nightly.link/data-catering/data-caterer/workflows/build/main/data-caterer-macos-x86_64.zip)
53-
- [Apple Silicon (M1/M2/M3)](https://nightly.link/data-catering/data-caterer/workflows/build/main/data-caterer-macos-aarch64.zip)
54-
- **Windows**:
55-
- [x64](https://nightly.link/data-catering/data-caterer/workflows/build/main/data-caterer-windows-x86_64.zip)
56-
1. After downloading, go to 'Downloads' folder and 'Extract All' from data-caterer-windows-x86_64
57-
1. Double-click the installer to install Data Caterer
58-
1. Click on 'More info' then at the bottom, click 'Run anyway'
59-
1. Go to '/Program Files/DataCaterer' folder and run DataCaterer application
60-
1. If your browser doesn't open, go to [http://localhost:9898](http://localhost:9898) in your preferred browser
61-
- **Linux**:
62-
- [amd64](https://nightly.link/data-catering/data-caterer/workflows/build/main/data-caterer-linux-amd64.zip)
63-
- [arm64](https://nightly.link/data-catering/data-caterer/workflows/build/main/data-caterer-linux-arm64.zip)
64-
65-
[Follow quick start instructions from here if you want more details](https://data.catering/latest/get-started/quick-start/).
37+
## Quick Start
38+
39+
### Java/Scala API (Recommended)
40+
41+
```shell
42+
git clone git@github.com:data-catering/data-caterer.git
43+
cd data-caterer/example
44+
./run.sh
45+
```
46+
47+
It will run the [`DocumentationPlanRun`](example/src/main/scala/io/github/datacatering/plan/DocumentationPlanRun.scala) class.
48+
Press Enter to run the default example. Check results at `docker/sample/report/index.html`.
49+
50+
### YAML
51+
52+
```shell
53+
git clone git@github.com:data-catering/data-caterer.git
54+
cd data-caterer/example
55+
./run.sh csv.yaml
56+
```
57+
58+
It will run the [`csv.yaml`](example/docker/data/custom/plan/csv.yaml) plan file and the [`csv_transaction_file`](example/docker/data/custom/task/file/csv/csv_transaction_file.yaml) task file.
59+
Check results at `docker/data/custom/report/index.html`.
60+
61+
### UI
62+
63+
```shell
64+
docker run -d -p 9898:9898 -e DEPLOY_MODE=standalone --name datacaterer datacatering/data-caterer:0.18.0
65+
```
66+
67+
Open [http://localhost:9898](http://localhost:9898).
68+
69+
[**Full quick start guide**](https://data.catering/latest/get-started/quick-start/)
6670

6771

6872
## Integrations
@@ -123,12 +127,6 @@ Different ways to run Data Caterer based on your use case:
123127

124128
[Can check here for full list of roadmap items.](https://data.catering/latest/use-case/roadmap/)
125129

126-
## Pricing
127-
128-
Data Caterer is set up under a usage pricing model for the latest application version. There are different pricing tiers based on how much you use Data Caterer. This also includes support and requesting features. The current open-source version will be kept for those who want to continue using the open-source version.
129-
130-
[Find out more details here.](https://data.catering/latest/pricing/)
131-
132130
### Mildly Quick Start
133131

134132
#### Generate and validate data

api/src/main/java/io/github/datacatering/datacaterer/javaapi/api/PlanRun.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ public ForeignKeyRelation foreignField(String dataSource, String step, String fi
240240
* @return A ForeignKeyRelation instance.
241241
*/
242242
public ForeignKeyRelation foreignField(String dataSource, String step, List<String> fields) {
243-
return new ForeignKeyRelation(dataSource, step, toScalaList(fields));
243+
return new ForeignKeyRelation(dataSource, step, toScalaList(fields), scala.Option.empty(), scala.Option.empty(), scala.Option.empty());
244244
}
245245

246246
/**
@@ -255,7 +255,8 @@ public ForeignKeyRelation foreignField(ConnectionTaskBuilder<?> connectionTaskBu
255255
return new ForeignKeyRelation(
256256
connectionTaskBuilder.connectionConfigWithTaskBuilder().dataSourceName(),
257257
connectionTaskBuilder.getStep().step().name(),
258-
toScalaList(List.of(field))
258+
toScalaList(List.of(field)),
259+
scala.Option.empty(), scala.Option.empty(), scala.Option.empty()
259260
);
260261
}
261262

@@ -271,7 +272,8 @@ public ForeignKeyRelation foreignField(ConnectionTaskBuilder<?> connectionTaskBu
271272
return new ForeignKeyRelation(
272273
connectionTaskBuilder.connectionConfigWithTaskBuilder().dataSourceName(),
273274
connectionTaskBuilder.getStep().step().name(),
274-
toScalaList(fields)
275+
toScalaList(fields),
276+
scala.Option.empty(), scala.Option.empty(), scala.Option.empty()
275277
);
276278
}
277279

@@ -284,7 +286,7 @@ public ForeignKeyRelation foreignField(ConnectionTaskBuilder<?> connectionTaskBu
284286
* @return A ForeignKeyRelation instance.
285287
*/
286288
public ForeignKeyRelation foreignField(ConnectionTaskBuilder<?> connectionTaskBuilder, String step, List<String> fields) {
287-
return new ForeignKeyRelation(connectionTaskBuilder.connectionConfigWithTaskBuilder().dataSourceName(), step, toScalaList(fields));
289+
return new ForeignKeyRelation(connectionTaskBuilder.connectionConfigWithTaskBuilder().dataSourceName(), step, toScalaList(fields), scala.Option.empty(), scala.Option.empty(), scala.Option.empty());
288290
}
289291

290292
/**

api/src/main/scala/io/github/datacatering/datacaterer/api/DataCatererConfigurationBuilder.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package io.github.datacatering.datacaterer.api
33
import com.softwaremill.quicklens.ModifyPimp
44
import io.github.datacatering.datacaterer.api.connection.{BigQueryBuilder, CassandraBuilder, ConnectionTaskBuilder, FileBuilder, HttpBuilder, KafkaBuilder, MySqlBuilder, NoopBuilder, PostgresBuilder, RabbitmqBuilder, SolaceBuilder}
55
import io.github.datacatering.datacaterer.api.converter.Converters.toScalaMap
6-
import io.github.datacatering.datacaterer.api.model.Constants.{BIGQUERY_WRITE_METHOD, _}
6+
import io.github.datacatering.datacaterer.api.model.Constants._
77
import io.github.datacatering.datacaterer.api.model.DataCatererConfiguration
88

99
import scala.annotation.varargs

0 commit comments

Comments
 (0)