Skip to content

Commit a02a6c9

Browse files
committed
support for expect_columns and columns_not_present
1 parent 60ab498 commit a02a6c9

13 files changed

Lines changed: 1096 additions & 0 deletions

adapters/clickhouse_adapter.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,39 @@ func (a *ClickhouseDbqDataSourceAdapter) InterpretDataQualityCheck(check *dbqcor
8282

8383
return sqlQuery, nil
8484
}
85+
86+
if check.SchemaCheck.ColumnsNotPresent != nil {
87+
config := check.SchemaCheck.ColumnsNotPresent
88+
89+
// Validate that at least one of columns or pattern is provided
90+
if len(config.Columns) == 0 && config.Pattern == "" {
91+
return "", fmt.Errorf("columns_not_present check requires either 'columns' list or 'pattern'")
92+
}
93+
94+
var conditions []string
95+
96+
// Add exact column name matches
97+
if len(config.Columns) > 0 {
98+
for _, col := range config.Columns {
99+
conditions = append(conditions, fmt.Sprintf("name = '%s'", col))
100+
}
101+
}
102+
103+
// Add pattern matching
104+
if config.Pattern != "" {
105+
likePattern := strings.ReplaceAll(config.Pattern, "*", "%")
106+
conditions = append(conditions, fmt.Sprintf("name LIKE '%s'", likePattern))
107+
}
108+
109+
// count of unwanted columns that exist
110+
sqlQuery := fmt.Sprintf(`select count()
111+
from system.columns
112+
where database = '%s'
113+
and table = '%s'
114+
and (%s)`, database, table, strings.Join(conditions, " or "))
115+
116+
return sqlQuery, nil
117+
}
85118
}
86119

87120
if check.ParsedCheck == nil {

adapters/clickhouse_adapter_test.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,89 @@ func TestClickhouseAdapter_InterpretDataQualityCheck(t *testing.T) {
296296
expectError: true,
297297
errorMessage: "dataset must be in format database.table",
298298
},
299+
{
300+
name: "columns_not_present check with column list",
301+
check: &dbqcore.DataQualityCheck{
302+
Expression: "columns_not_present",
303+
SchemaCheck: &dbqcore.SchemaCheckConfig{
304+
ColumnsNotPresent: &dbqcore.ColumnsNotPresentConfig{
305+
Columns: []string{"credit_card_number", "ssn", "password"},
306+
},
307+
},
308+
},
309+
dataset: "default.users",
310+
whereClause: "",
311+
expectedSQL: `select count()
312+
from system.columns
313+
where database = 'default'
314+
and table = 'users'
315+
and (name = 'credit_card_number' or name = 'ssn' or name = 'password')`,
316+
},
317+
{
318+
name: "columns_not_present check with pattern",
319+
check: &dbqcore.DataQualityCheck{
320+
Expression: "columns_not_present",
321+
SchemaCheck: &dbqcore.SchemaCheckConfig{
322+
ColumnsNotPresent: &dbqcore.ColumnsNotPresentConfig{
323+
Pattern: "backup_*",
324+
},
325+
},
326+
},
327+
dataset: "analytics.metrics",
328+
whereClause: "",
329+
expectedSQL: `select count()
330+
from system.columns
331+
where database = 'analytics'
332+
and table = 'metrics'
333+
and (name LIKE 'backup_%')`,
334+
},
335+
{
336+
name: "columns_not_present check with both columns and pattern",
337+
check: &dbqcore.DataQualityCheck{
338+
Expression: "columns_not_present",
339+
SchemaCheck: &dbqcore.SchemaCheckConfig{
340+
ColumnsNotPresent: &dbqcore.ColumnsNotPresentConfig{
341+
Columns: []string{"api_key", "secret"},
342+
Pattern: "token_*",
343+
},
344+
},
345+
},
346+
dataset: "api.requests",
347+
whereClause: "",
348+
expectedSQL: `select count()
349+
from system.columns
350+
where database = 'api'
351+
and table = 'requests'
352+
and (name = 'api_key' or name = 'secret' or name LIKE 'token_%')`,
353+
},
354+
{
355+
name: "columns_not_present check with neither columns nor pattern",
356+
check: &dbqcore.DataQualityCheck{
357+
Expression: "columns_not_present",
358+
SchemaCheck: &dbqcore.SchemaCheckConfig{
359+
ColumnsNotPresent: &dbqcore.ColumnsNotPresentConfig{},
360+
},
361+
},
362+
dataset: "test.table",
363+
whereClause: "",
364+
expectError: true,
365+
errorMessage: "columns_not_present check requires either 'columns' list or 'pattern'",
366+
},
367+
{
368+
name: "columns_not_present check invalid dataset format",
369+
check: &dbqcore.DataQualityCheck{
370+
Expression: "columns_not_present",
371+
SchemaCheck: &dbqcore.SchemaCheckConfig{
372+
ColumnsNotPresent: &dbqcore.ColumnsNotPresentConfig{
373+
Pattern: "old_*",
374+
},
375+
},
376+
},
377+
dataset: "invalid_dataset",
378+
whereClause: "",
379+
expectError: true,
380+
errorMessage: "dataset must be in format database.table",
381+
},
299382
{
300383
name: "unknown function fallback",
301384
check: &dbqcore.DataQualityCheck{

adapters/mysql_adapter.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,39 @@ func (a *MysqlDbqDataSourceAdapter) InterpretDataQualityCheck(check *dbqcore.Dat
8282

8383
return sqlQuery, nil
8484
}
85+
86+
if check.SchemaCheck.ColumnsNotPresent != nil {
87+
config := check.SchemaCheck.ColumnsNotPresent
88+
89+
// Validate that at least one of columns or pattern is provided
90+
if len(config.Columns) == 0 && config.Pattern == "" {
91+
return "", fmt.Errorf("columns_not_present check requires either 'columns' list or 'pattern'")
92+
}
93+
94+
var conditions []string
95+
96+
// Add exact column name matches
97+
if len(config.Columns) > 0 {
98+
for _, col := range config.Columns {
99+
conditions = append(conditions, fmt.Sprintf("column_name = '%s'", col))
100+
}
101+
}
102+
103+
// Add pattern matching
104+
if config.Pattern != "" {
105+
likePattern := strings.ReplaceAll(config.Pattern, "*", "%")
106+
conditions = append(conditions, fmt.Sprintf("column_name LIKE '%s'", likePattern))
107+
}
108+
109+
// Query returns count of unwanted columns that exist
110+
sqlQuery := fmt.Sprintf(`select count(*)
111+
from information_schema.columns
112+
where table_schema = '%s'
113+
and table_name = '%s'
114+
and (%s)`, schema, table, strings.Join(conditions, " or "))
115+
116+
return sqlQuery, nil
117+
}
85118
}
86119

87120
if check.ParsedCheck == nil {

adapters/mysql_adapter_test.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,89 @@ func TestMySQLAdapter_InterpretDataQualityCheck(t *testing.T) {
296296
expectError: true,
297297
errorMessage: "dataset must be in format database.table",
298298
},
299+
{
300+
name: "columns_not_present check with column list",
301+
check: &dbqcore.DataQualityCheck{
302+
Expression: "columns_not_present",
303+
SchemaCheck: &dbqcore.SchemaCheckConfig{
304+
ColumnsNotPresent: &dbqcore.ColumnsNotPresentConfig{
305+
Columns: []string{"credit_card_number", "ssn", "password"},
306+
},
307+
},
308+
},
309+
dataset: "mydb.users",
310+
whereClause: "",
311+
expectedSQL: `select count(*)
312+
from information_schema.columns
313+
where table_schema = 'mydb'
314+
and table_name = 'users'
315+
and (column_name = 'credit_card_number' or column_name = 'ssn' or column_name = 'password')`,
316+
},
317+
{
318+
name: "columns_not_present check with pattern",
319+
check: &dbqcore.DataQualityCheck{
320+
Expression: "columns_not_present",
321+
SchemaCheck: &dbqcore.SchemaCheckConfig{
322+
ColumnsNotPresent: &dbqcore.ColumnsNotPresentConfig{
323+
Pattern: "temp_*",
324+
},
325+
},
326+
},
327+
dataset: "shop.products",
328+
whereClause: "",
329+
expectedSQL: `select count(*)
330+
from information_schema.columns
331+
where table_schema = 'shop'
332+
and table_name = 'products'
333+
and (column_name LIKE 'temp_%')`,
334+
},
335+
{
336+
name: "columns_not_present check with both columns and pattern",
337+
check: &dbqcore.DataQualityCheck{
338+
Expression: "columns_not_present",
339+
SchemaCheck: &dbqcore.SchemaCheckConfig{
340+
ColumnsNotPresent: &dbqcore.ColumnsNotPresentConfig{
341+
Columns: []string{"pan", "cvv"},
342+
Pattern: "card_*",
343+
},
344+
},
345+
},
346+
dataset: "ecommerce.orders",
347+
whereClause: "",
348+
expectedSQL: `select count(*)
349+
from information_schema.columns
350+
where table_schema = 'ecommerce'
351+
and table_name = 'orders'
352+
and (column_name = 'pan' or column_name = 'cvv' or column_name LIKE 'card_%')`,
353+
},
354+
{
355+
name: "columns_not_present check with neither columns nor pattern",
356+
check: &dbqcore.DataQualityCheck{
357+
Expression: "columns_not_present",
358+
SchemaCheck: &dbqcore.SchemaCheckConfig{
359+
ColumnsNotPresent: &dbqcore.ColumnsNotPresentConfig{},
360+
},
361+
},
362+
dataset: "test.table",
363+
whereClause: "",
364+
expectError: true,
365+
errorMessage: "columns_not_present check requires either 'columns' list or 'pattern'",
366+
},
367+
{
368+
name: "columns_not_present check invalid dataset format",
369+
check: &dbqcore.DataQualityCheck{
370+
Expression: "columns_not_present",
371+
SchemaCheck: &dbqcore.SchemaCheckConfig{
372+
ColumnsNotPresent: &dbqcore.ColumnsNotPresentConfig{
373+
Pattern: "debug_*",
374+
},
375+
},
376+
},
377+
dataset: "invalid_dataset",
378+
whereClause: "",
379+
expectError: true,
380+
errorMessage: "dataset must be in format database.table",
381+
},
299382
{
300383
name: "unknown function fallback",
301384
check: &dbqcore.DataQualityCheck{

adapters/postgresql_adapter.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,39 @@ func (a *PostgresqlDbqDataSourceAdapter) InterpretDataQualityCheck(check *dbqcor
8282

8383
return sqlQuery, nil
8484
}
85+
86+
if check.SchemaCheck.ColumnsNotPresent != nil {
87+
config := check.SchemaCheck.ColumnsNotPresent
88+
89+
// Validate that at least one of columns or pattern is provided
90+
if len(config.Columns) == 0 && config.Pattern == "" {
91+
return "", fmt.Errorf("columns_not_present check requires either 'columns' list or 'pattern'")
92+
}
93+
94+
var conditions []string
95+
96+
// Add exact column name matches
97+
if len(config.Columns) > 0 {
98+
for _, col := range config.Columns {
99+
conditions = append(conditions, fmt.Sprintf("column_name = '%s'", col))
100+
}
101+
}
102+
103+
// Add pattern matching
104+
if config.Pattern != "" {
105+
likePattern := strings.ReplaceAll(config.Pattern, "*", "%")
106+
conditions = append(conditions, fmt.Sprintf("column_name LIKE '%s'", likePattern))
107+
}
108+
109+
// Query returns count of unwanted columns that exist
110+
sqlQuery := fmt.Sprintf(`select count(*)
111+
from information_schema.columns
112+
where table_schema = '%s'
113+
and table_name = '%s'
114+
and (%s)`, schema, table, strings.Join(conditions, " or "))
115+
116+
return sqlQuery, nil
117+
}
85118
}
86119

87120
if check.ParsedCheck == nil {

adapters/postgresql_adapter_test.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,89 @@ func TestPostgreSQLAdapter_InterpretDataQualityCheck(t *testing.T) {
296296
expectError: true,
297297
errorMessage: "dataset must be in format database.table",
298298
},
299+
{
300+
name: "columns_not_present check with column list",
301+
check: &dbqcore.DataQualityCheck{
302+
Expression: "columns_not_present",
303+
SchemaCheck: &dbqcore.SchemaCheckConfig{
304+
ColumnsNotPresent: &dbqcore.ColumnsNotPresentConfig{
305+
Columns: []string{"credit_card_number", "ssn", "password"},
306+
},
307+
},
308+
},
309+
dataset: "public.users",
310+
whereClause: "",
311+
expectedSQL: `select count(*)
312+
from information_schema.columns
313+
where table_schema = 'public'
314+
and table_name = 'users'
315+
and (column_name = 'credit_card_number' or column_name = 'ssn' or column_name = 'password')`,
316+
},
317+
{
318+
name: "columns_not_present check with pattern",
319+
check: &dbqcore.DataQualityCheck{
320+
Expression: "columns_not_present",
321+
SchemaCheck: &dbqcore.SchemaCheckConfig{
322+
ColumnsNotPresent: &dbqcore.ColumnsNotPresentConfig{
323+
Pattern: "pii_*",
324+
},
325+
},
326+
},
327+
dataset: "analytics.events",
328+
whereClause: "",
329+
expectedSQL: `select count(*)
330+
from information_schema.columns
331+
where table_schema = 'analytics'
332+
and table_name = 'events'
333+
and (column_name LIKE 'pii_%')`,
334+
},
335+
{
336+
name: "columns_not_present check with both columns and pattern",
337+
check: &dbqcore.DataQualityCheck{
338+
Expression: "columns_not_present",
339+
SchemaCheck: &dbqcore.SchemaCheckConfig{
340+
ColumnsNotPresent: &dbqcore.ColumnsNotPresentConfig{
341+
Columns: []string{"credit_card", "cvv"},
342+
Pattern: "sensitive_*",
343+
},
344+
},
345+
},
346+
dataset: "shop.orders",
347+
whereClause: "",
348+
expectedSQL: `select count(*)
349+
from information_schema.columns
350+
where table_schema = 'shop'
351+
and table_name = 'orders'
352+
and (column_name = 'credit_card' or column_name = 'cvv' or column_name LIKE 'sensitive_%')`,
353+
},
354+
{
355+
name: "columns_not_present check with neither columns nor pattern",
356+
check: &dbqcore.DataQualityCheck{
357+
Expression: "columns_not_present",
358+
SchemaCheck: &dbqcore.SchemaCheckConfig{
359+
ColumnsNotPresent: &dbqcore.ColumnsNotPresentConfig{},
360+
},
361+
},
362+
dataset: "test.table",
363+
whereClause: "",
364+
expectError: true,
365+
errorMessage: "columns_not_present check requires either 'columns' list or 'pattern'",
366+
},
367+
{
368+
name: "columns_not_present check invalid dataset format",
369+
check: &dbqcore.DataQualityCheck{
370+
Expression: "columns_not_present",
371+
SchemaCheck: &dbqcore.SchemaCheckConfig{
372+
ColumnsNotPresent: &dbqcore.ColumnsNotPresentConfig{
373+
Columns: []string{"temp_col"},
374+
},
375+
},
376+
},
377+
dataset: "invalid_dataset",
378+
whereClause: "",
379+
expectError: true,
380+
errorMessage: "dataset must be in format database.table",
381+
},
299382
{
300383
name: "unknown function fallback",
301384
check: &dbqcore.DataQualityCheck{

changelog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Changelog
2+
3+
## [Unreleased] - v0.5.0

0 commit comments

Comments
 (0)