Skip to content

Commit 5df2ee4

Browse files
committed
simplify sql gen for checks
1 parent 8cd19d0 commit 5df2ee4

4 files changed

Lines changed: 41 additions & 39 deletions

File tree

checks.yaml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,14 @@ validations:
3131
description: "some raw query description here"
3232
severity: error
3333
query: |
34-
select countIf(trip_distance == 0) > 0 from {{table}}
34+
select countIf(trip_distance == 0) > 0 from {{table}} where 1=1
3535
3636
# - dataset: pgsql-staging@[public.table_1, public.table_2]
3737
# checks:
38-
# - id: "row count between 0 and 100"
38+
# - id: row_count > 0
3939
# severity: warn
40-
# type: row_count
41-
# params:
42-
# min: 0
43-
# max: 1000
40+
# type:
41+
4442

4543
# v1 supported functions:
4644
# ---

internal/app.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ type DbqApp interface {
1616
GetDbqConfig() *DbqConfig
1717
SaveDbqConfig() error
1818
FindDataSourceById(srcId string) *DataSource
19-
RunCheck(check *Check, dataSource *DataSource, dataSet string, defaultWhere string) (string, error)
19+
RunCheck(check *Check, dataSource *DataSource, dataset string, defaultWhere string) (string, error)
2020
}
2121

2222
type DbqAppImpl struct {
@@ -94,12 +94,12 @@ func (app *DbqAppImpl) FindDataSourceById(srcId string) *DataSource {
9494
return nil
9595
}
9696

97-
func (app *DbqAppImpl) RunCheck(check *Check, dataSource *DataSource, dataSet string, defaultWhere string) (string, error) {
97+
func (app *DbqAppImpl) RunCheck(check *Check, dataSource *DataSource, dataset string, defaultWhere string) (string, error) {
9898
cnn, err := getDbqConnector(*dataSource)
9999
if err != nil {
100100
return "", err
101101
}
102-
return cnn.RunCheck(check, dataSet, defaultWhere)
102+
return cnn.RunCheck(check, dataset, defaultWhere)
103103
}
104104

105105
func initConfig(dbqConfigPath string) (*DbqConfig, string) {

internal/clickhouse.go

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -230,10 +230,10 @@ func (c *ClickhouseDbqConnector) RunCheck(check *Check, dataset string, defaultW
230230

231231
query, err := generateDataCheckQuery(check, dataset, defaultWhere)
232232
if err != nil {
233-
return "", fmt.Errorf("failed to generate SQL for check %s (%s): %s", check.ID, dataset, err.Error())
233+
return "", fmt.Errorf("failed to generate SQL for check (%s)/(%s): %s", check.ID, dataset, err.Error())
234234
}
235235

236-
log.Printf("Executing SQL for (%s): %s", check.ID, query)
236+
log.Printf("Executing SQL for '%s': %s", check.ID, query)
237237

238238
startTime := time.Now()
239239
rows, err := c.cnn.Query(context.Background(), query)
@@ -248,7 +248,8 @@ func (c *ClickhouseDbqConnector) RunCheck(check *Check, dataset string, defaultW
248248
if err := rows.Scan(&checkPassed); err != nil {
249249
return "", fmt.Errorf("failed to scan row: %w", err)
250250
}
251-
log.Printf("Check passed: %t (%d ms)", checkPassed, elapsed)
251+
log.Printf("Check passed: %t (in %d ms)", checkPassed, elapsed)
252+
log.Printf("---")
252253
}
253254

254255
if err = rows.Err(); err != nil {
@@ -288,18 +289,18 @@ func fetchColumns(cnn driver.Conn, ctx context.Context, databaseName string, tab
288289
return cols, nil
289290
}
290291

291-
func generateDataCheckQuery(check *Check, dataSet string, whereClause string) (string, error) {
292+
func generateDataCheckQuery(check *Check, dataset string, whereClause string) (string, error) {
292293
var sqlQuery string
293294

294295
// handle raw_query first
295296
if check.ID == CheckTypeRawQuery {
296297
if check.Query == "" {
297298
return "", fmt.Errorf("check with id 'raw_query' requires a 'query' field")
298299
}
299-
sqlQuery = strings.ReplaceAll(check.Query, "{{table}}", dataSet)
300300

301+
sqlQuery = strings.ReplaceAll(check.Query, "{{table}}", dataset)
301302
if whereClause != "" {
302-
// todo: more sophisticated check might be needed
303+
// todo: more sophisticated check is needed
303304
if strings.Contains(strings.ToLower(sqlQuery), " where ") {
304305
sqlQuery = fmt.Sprintf("%s and (%s)", sqlQuery, whereClause)
305306
} else {
@@ -312,48 +313,47 @@ func generateDataCheckQuery(check *Check, dataSet string, whereClause string) (s
312313

313314
isAggFunction := startWithAnyOf([]string{
314315
"min", "max", "avg", "stddevPop", "sum",
315-
}, check.ID)
316+
}, strings.ToLower(check.ID))
316317

317318
var checkExpression string
319+
parts := strings.Fields(check.ID)
320+
if len(parts) < 3 {
321+
return "", fmt.Errorf("invalid format for check: %s", check.ID)
322+
}
323+
318324
switch {
319325
case strings.HasPrefix(check.ID, "row_count"):
320-
// format "row_count <operator> <value>"
321-
parts := strings.Fields(check.ID)
322-
if len(parts) != 3 {
323-
return "", fmt.Errorf("invalid format for row_count check: %s", check.ID)
324-
}
325-
checkExpression = fmt.Sprintf("count() %s %s", parts[1], parts[2])
326+
checkExpression = strings.Replace(check.ID, "row_count", "count()", 1)
326327

327328
case strings.HasPrefix(check.ID, "null_count"):
328-
// format "null_count(<column_name>) <operator> <value>"
329-
re := regexp.MustCompile(`null_count\((.*?)\)\s*(==|!=|>|<|>=|<=)\s*(\d+)`)
329+
re := regexp.MustCompile(`^null_count\((.*?)\)(.*)`)
330330
matches := re.FindStringSubmatch(check.ID)
331-
if len(matches) != 4 {
331+
if len(matches) < 3 {
332332
return "", fmt.Errorf("invalid format for null_count check: %s", check.ID)
333333
}
334334

335335
column := matches[1]
336-
operator := matches[2]
337-
value := matches[3]
338-
checkExpression = fmt.Sprintf("countIf(%s IS NULL) %s %s", column, operator, value)
336+
remainder := matches[2]
337+
checkExpression = fmt.Sprintf("countIf(isNull(%s))%s", column, remainder)
339338

340339
case isAggFunction:
341-
// format: <func>(<column_name>) <operator> <value>
342-
re := regexp.MustCompile(`^(min|max|avg|stddevPop|sum)\(([^)]+)\)\s+(==|>=|<=|>|<)\s+(.*)$`)
340+
re := regexp.MustCompile(`^(min|max|avg|stddevPop|sum)\((.*?)\)(.*)`)
343341
matches := re.FindStringSubmatch(check.ID)
344-
if len(matches) < 4 {
342+
if len(matches) < 3 {
343+
fmt.Println(matches, " --- ", len(matches))
345344
return "", fmt.Errorf("invalid format for aggregation function check: %s", check.ID)
346345
}
347-
checkExpression = fmt.Sprintf("%s", matches[0])
346+
347+
checkExpression = matches[0]
348348

349349
default:
350-
// Assume the ID itself is a valid boolean expression if no specific pattern matches
351-
// This is less robust but covers simple cases.
352-
log.Printf("Warning: Check ID '%s' did not match known patterns. Assuming it's a direct SQL boolean expression.", check.ID)
350+
// assume the ID itself is a valid boolean expression if no specific pattern matches
351+
// this is less robust but covers simple cases
352+
log.Printf("Warning: Check ID '%s' did not match known check patterns. Assuming it's a direct SQL boolean expression.", check.ID)
353353
checkExpression = check.ID
354354
}
355355

356-
sqlQuery = fmt.Sprintf("select %s from %s", checkExpression, dataSet)
356+
sqlQuery = fmt.Sprintf("select %s from %s", checkExpression, dataset)
357357
if whereClause != "" {
358358
sqlQuery = fmt.Sprintf("%s where %s", sqlQuery, whereClause)
359359
}

readme.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ It is designed to be easy to use and integrate into your existing workflow.
1717
- [ ] complete clickhouse support
1818
- [x] ping
1919
- [x] import datasets
20-
- [ ] profile dataset
20+
- [x] profile dataset
2121
- [x] rows in table
2222
- [x] min, max, avg, stddev for numeric columns
2323
- [x] count of nulls and blanks
2424
- [x] most frequent value in column
25-
- [ ] JSON export
25+
- [x] JSON export
2626
- [ ] run checks
2727
- [ ] implement support for custom sql check
2828
- [ ] implement aliases for common checks based on raw sql check
@@ -58,4 +58,8 @@ docker run -d -p 18123:8123 -p19000:9000 -e CLICKHOUSE_PASSWORD=changeme --name
5858
```
5959

6060
# Supported Datasources
61-
- Clickhouse
61+
- Clickhouse
62+
63+
# dbq configuration
64+
65+
# checks configuration

0 commit comments

Comments
 (0)