Skip to content

Commit 8cd19d0

Browse files
committed
support multiple datasets in checks.yaml
1 parent e726335 commit 8cd19d0

3 files changed

Lines changed: 58 additions & 13 deletions

File tree

checks.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
version: "1"
22
validations:
3-
- dataset: ch-local@[nyc_taxi.trips_small]
3+
- dataset: ch-local@[nyc_taxi.trips_small, nyc_taxi.trips_big]
44
where: "pickup_datetime > '2014-01-01'"
55
checks:
66
- id: row_count > 0

cmd/check.go

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package cmd
22

33
import (
44
"dbq/internal"
5+
"fmt"
56
"log"
67
"strings"
78

@@ -24,25 +25,31 @@ By automating these checks, you can proactively identify and address data qualit
2425

2526
checksCfg, err := internal.LoadChecksConfig(checksFile)
2627
if err != nil {
27-
log.Printf("Failed to read checks configuration: %s", err.Error())
28+
return fmt.Errorf("error while loading checks configuration file: %w", err)
2829
}
2930

30-
for i, ruleSet := range checksCfg.Validations {
31-
log.Printf("Running check for %s [%d/%d]", ruleSet.Dataset, i+1, len(checksCfg.Validations))
31+
for i, rule := range checksCfg.Validations {
32+
log.Printf("Running check for %s [%d/%d]", rule.Dataset, i+1, len(checksCfg.Validations))
3233

33-
// todo: validation
34-
parts := strings.Split(ruleSet.Dataset, "@")
35-
dataSourceId := parts[0]
36-
dataSet := parts[1] // todo: parse list
34+
dataSourceId, datasets, err := parseDatasetString(rule.Dataset)
35+
if err != nil {
36+
return fmt.Errorf("error while parsing dataset property: %w", err)
37+
}
3738

3839
dataSource := app.FindDataSourceById(dataSourceId)
40+
if dataSource == nil {
41+
return fmt.Errorf("specified data source not found in dbq configuration: %s", dataSourceId)
42+
}
3943

40-
for _, check := range ruleSet.Checks {
41-
_, err := app.RunCheck(&check, dataSource, dataSet, ruleSet.Where)
42-
if err != nil {
43-
log.Printf("Failed to run check: %s", err.Error())
44+
for dsIdx, dataset := range datasets {
45+
log.Printf(" [%d/%d] Running checks for: %s", dsIdx+1, len(datasets), dataset)
46+
for _, check := range rule.Checks {
47+
_, err := app.RunCheck(&check, dataSource, dataset, rule.Where)
48+
if err != nil {
49+
log.Printf("Failed to run check: %s", err.Error())
50+
}
51+
// todo: act on check result
4452
}
45-
// todo: act on check result
4653
}
4754
}
4855

@@ -55,3 +62,38 @@ By automating these checks, you can proactively identify and address data qualit
5562

5663
return cmd
5764
}
65+
66+
func parseDatasetString(input string) (datasource string, datasets []string, err error) {
67+
atIndex := strings.Index(input, "@")
68+
if atIndex == -1 {
69+
return "", nil, fmt.Errorf("invalid dataset string format: %s", input)
70+
}
71+
72+
datasource = strings.TrimSpace(input[:atIndex])
73+
if datasource == "" {
74+
return "", nil, fmt.Errorf("datasource part cannot be empty: %s", input)
75+
}
76+
77+
datasetPart := strings.TrimSpace(input[atIndex+1:])
78+
if !strings.HasPrefix(datasetPart, "[") || !strings.HasSuffix(datasetPart, "]") {
79+
return "", nil, fmt.Errorf("invalid dataset format (expected '[dataset1, dataset2,...]'): %s", input)
80+
}
81+
82+
// slice off '[' and ']'
83+
datasetsContent := datasetPart[1 : len(datasetPart)-1]
84+
trimmedContent := strings.TrimSpace(datasetsContent)
85+
if trimmedContent == "" {
86+
return "", nil, fmt.Errorf("dataset part can't be empty: %s", input)
87+
}
88+
89+
rawDatasets := strings.Split(datasetsContent, ",")
90+
datasets = make([]string, 0, len(rawDatasets))
91+
for _, ds := range rawDatasets {
92+
cleanedDS := strings.TrimSpace(ds)
93+
if cleanedDS != "" {
94+
datasets = append(datasets, cleanedDS)
95+
}
96+
}
97+
98+
return datasource, datasets, nil
99+
}

readme.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,6 @@ It is designed to be easy to use and integrate into your existing workflow.
5656
```bash
5757
docker run -d -p 18123:8123 -p19000:9000 -e CLICKHOUSE_PASSWORD=changeme --name some-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server
5858
```
59+
60+
# Supported Datasources
61+
- Clickhouse

0 commit comments

Comments
 (0)