Skip to content

Commit 403d433

Browse files
committed
code clean up
1 parent 346596a commit 403d433

13 files changed

Lines changed: 83 additions & 76 deletions

README.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,45 @@
11
# dbqcore
22

33
DataBridge Quality Core library is a part of [dbqctl](https://github.com/DataBridgeTech/dbqctl).
4+
5+
## Available check functions:
6+
- Schema:
7+
- `expect_columns_ordered`: Validate table columns match an ordered list
8+
- `expect_columns`: Validate table has one of columns from unordered list
9+
- `columns_not_present`: Validate table doesn't have any columns from the list or matching pattern
10+
- Table:
11+
- `row_count`: Count of rows in the table
12+
- `raw_query`: Custom SQL query for complex validations
13+
- Column:
14+
- `not_null`: Check for null values in a column
15+
- `freshness`: Check data recency based on timestamp column
16+
- `uniqueness`: Check for unique values in a column
17+
- `min`/max: Minimum and maximum values for numeric columns
18+
- `sum`: Sum of values in a column
19+
- `avg`: Average of values in a column
20+
- `stddev`: Standard deviation of values in a column
21+
22+
### Operators supported:
23+
- Comparison: `<, >, <=, >=, ==, !=`
24+
- Range: `between X and Y`
25+
- Function-only checks (like `not_null, uniqueness`)
26+
27+
## Changelog
28+
29+
### v0.5.0
30+
31+
#### Added
32+
- **Schema Validation**: New `schema_checks` support for validating database table schemas
33+
- `expect_columns` check to validate required column presence and types
34+
- `expect_columns_ordered` check to validate required column presence and types in specific order
35+
- `columns_not_present` check to ensure specific columns are not present by stop-list or pattern
36+
- **Enhanced Check Configuration**: New flexible checks format with improved YAML configuration
37+
- **Database Adapter Architecture**: Refactored to use adapter pattern for better database abstraction
38+
- **Comprehensive Test Coverage**: Added extensive test suites for all adapters and validation logic
39+
- **CI/CD Pipeline**: GitHub Actions workflow for automated testing
40+
41+
#### Improved
42+
- **Performance**: Enhanced query execution with optimized adapter interfaces
43+
- **Configuration**: More flexible check expression parsing and validation
44+
- **Error Handling**: Better validation and error reporting for check results
45+
- **Code Quality**: Comprehensive refactoring with improved maintainability

changelog.md

Lines changed: 0 additions & 3 deletions
This file was deleted.

check_parser.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,10 @@ var (
5858
"stddev": true,
5959
}
6060

61-
// reserved for schema checks
6261
schemaScopeFunctions = map[string]bool{
63-
// Schema checks are now handled via schema_check config format
62+
"expect_columns": true,
63+
"expect_columns_ordered": true,
64+
"columns_not_present": true,
6465
}
6566
)
6667

check_parser_test.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,10 +409,25 @@ func TestInferScope(t *testing.T) {
409409
expected: ScopeColumn,
410410
},
411411
{
412-
name: "schema scope function",
412+
name: "table scope function - raw_query",
413413
functionName: "raw_query",
414414
expected: ScopeTable,
415415
},
416+
{
417+
name: "schema scope function - expect_columns",
418+
functionName: "expect_columns",
419+
expected: ScopeSchema,
420+
},
421+
{
422+
name: "schema scope function - expect_columns_ordered",
423+
functionName: "expect_columns_ordered",
424+
expected: ScopeSchema,
425+
},
426+
{
427+
name: "schema scope function - columns_not_present",
428+
functionName: "columns_not_present",
429+
expected: ScopeSchema,
430+
},
416431
{
417432
name: "unknown function defaults to column",
418433
functionName: "unknown_func",

checks-sample.yml

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,21 @@
66
# - column: Operations on specific columns (not_null, uniqueness, freshness, min, max, etc.)
77
#
88
# Available check functions:
9+
# Schema:
10+
# - expect_columns_ordered: Validate table columns match an ordered list
11+
# - expect_columns: Validate table has one of columns from unordered list
12+
# - columns_not_present: Validate table doesn't have any columns from the list or matching pattern
13+
# Table:
914
# - row_count: Count of rows in the table
15+
# - raw_query: Custom SQL query for complex validations
16+
# Column:
1017
# - not_null: Check for null values in a column
1118
# - freshness: Check data recency based on timestamp column
1219
# - uniqueness: Check for unique values in a column
1320
# - min/max: Minimum and maximum values for numeric columns
1421
# - sum: Sum of values in a column
1522
# - avg: Average of values in a column
1623
# - stddev: Standard deviation of values in a column
17-
# - avgWeighted: Weighted average with custom weight parameter
18-
# - expect_columns_ordered: Validate table columns match an ordered list
19-
# - expect_columns: Validate table has one of columns from unordered list
20-
# - columns_not_present: Validate table doesn't have any columns from the list or matching pattern
21-
# - raw_query: Custom SQL query for complex validations
2224
#
2325
# Operators supported:
2426
# - Comparison: <, >, <=, >=, ==, !=
@@ -91,10 +93,6 @@ rules:
9193
- sum(fare_amount) between 10000 and 10000000:
9294
desc: "Total fare amount should be within expected range"
9395

94-
# example of custom weighted average
95-
- avgWeighted(fare_amount, trip_distance) between 2.0 and 15.0:
96-
desc: "Weighted average fare per distance should be reasonable"
97-
9896
# custom validation with raw query
9997
- raw_query:
10098
desc: "Check for trips with zero distance but positive fare"

checks_cfg.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ func (c *DataQualityCheck) UnmarshalYAML(node *yaml.Node) error {
7373
key := node.Content[0].Value
7474
value := node.Content[1]
7575

76-
if key == "schema_check" {
76+
if key == CheckTypeSchemaCheck {
7777
// Handle schema_check format
7878
type tempCheck struct {
7979
SchemaCheck *SchemaCheckConfig `yaml:"schema_check"`

checks_cfg_schema_test.go

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -128,27 +128,6 @@ schema_check:
128128
// Validate ParsedCheck
129129
if tt.expectedCheck.ParsedCheck != nil {
130130
t.Errorf("ParsedCheck is non-nil, expected nil for schema checks")
131-
132-
//if check.ParsedCheck == nil {
133-
// t.Errorf("ParsedCheck is nil, expected non-nil")
134-
//} else {
135-
// if check.ParsedCheck.FunctionName != tt.expectedCheck.ParsedCheck.FunctionName {
136-
// t.Errorf("ParsedCheck.FunctionName mismatch: got %s, want %s",
137-
// check.ParsedCheck.FunctionName, tt.expectedCheck.ParsedCheck.FunctionName)
138-
// }
139-
// if check.ParsedCheck.Scope != tt.expectedCheck.ParsedCheck.Scope {
140-
// t.Errorf("ParsedCheck.Scope mismatch: got %s, want %s",
141-
// check.ParsedCheck.Scope, tt.expectedCheck.ParsedCheck.Scope)
142-
// }
143-
// if check.ParsedCheck.Operator != tt.expectedCheck.ParsedCheck.Operator {
144-
// t.Errorf("ParsedCheck.Operator mismatch: got %s, want %s",
145-
// check.ParsedCheck.Operator, tt.expectedCheck.ParsedCheck.Operator)
146-
// }
147-
// if check.ParsedCheck.ThresholdValue != tt.expectedCheck.ParsedCheck.ThresholdValue {
148-
// t.Errorf("ParsedCheck.ThresholdValue mismatch: got %v, want %v",
149-
// check.ParsedCheck.ThresholdValue, tt.expectedCheck.ParsedCheck.ThresholdValue)
150-
// }
151-
//}
152131
}
153132
})
154133
}
@@ -217,15 +196,4 @@ rules:
217196
if schemaCheck.ParsedCheck != nil {
218197
t.Errorf("ParsedCheck should not be present")
219198
}
220-
221-
//else {
222-
// if schemaCheck.ParsedCheck.FunctionName != "expect_columns_ordered" {
223-
// t.Errorf("ParsedCheck FunctionName mismatch: got %s",
224-
// schemaCheck.ParsedCheck.FunctionName)
225-
// }
226-
// if schemaCheck.ParsedCheck.Scope != ScopeSchema {
227-
// t.Errorf("ParsedCheck Scope mismatch: got %s",
228-
// schemaCheck.ParsedCheck.Scope)
229-
// }
230-
//}
231199
}

checks_cfg_test.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package dbqcore
22

33
import (
4-
"io/ioutil"
54
"os"
65
"reflect"
76
"testing"
@@ -280,7 +279,7 @@ rules:
280279

281280
for _, tt := range tests {
282281
t.Run(tt.name, func(t *testing.T) {
283-
tmpFile, err := ioutil.TempFile("", "test-config-*.yml")
282+
tmpFile, err := os.CreateTemp("", "dbqcore-test-config-*.yml")
284283
if err != nil {
285284
t.Fatalf("Failed to create temp file: %v", err)
286285
}

connectors/mysql_connector.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,17 +42,17 @@ func (c *MysqlDbqConnector) Ping(ctx context.Context) (string, error) {
4242

4343
func (c *MysqlDbqConnector) ImportDatasets(ctx context.Context, filter string) ([]string, error) {
4444
query := `
45-
SELECT table_schema, table_name
46-
FROM information_schema.tables
47-
WHERE table_schema NOT IN ('mysql', 'information_schema', 'performance_schema', 'sys')
45+
select table_schema, table_name
46+
from information_schema.tables
47+
where table_schema not in ('mysql', 'information_schema', 'performance_schema', 'sys')
4848
`
4949

5050
var args []interface{}
5151
if filter != "" {
52-
query += " AND (table_schema LIKE ? OR table_name LIKE ?)"
52+
query += " and (table_schema like ? or table_name like ?)"
5353
args = append(args, fmt.Sprintf("%%%s%%", filter), fmt.Sprintf("%%%s%%", filter))
5454
}
55-
query += " ORDER BY table_schema, table_name"
55+
query += " order by table_schema, table_name"
5656

5757
rows, err := c.db.QueryContext(ctx, query, args...)
5858
if err != nil {

connectors/postgresql_connector.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,17 @@ func (c *PostgresqlDbqConnector) Ping(ctx context.Context) (string, error) {
4343

4444
func (c *PostgresqlDbqConnector) ImportDatasets(ctx context.Context, filter string) ([]string, error) {
4545
query := `
46-
SELECT table_schema, table_name
47-
FROM information_schema.tables
48-
WHERE table_schema NOT IN ('pg_catalog', 'information_schema')
46+
select table_schema, table_name
47+
from information_schema.tables
48+
where table_schema not in ('pg_catalog', 'information_schema')
4949
`
5050

5151
var args []interface{}
5252
if filter != "" {
53-
query += " AND (table_schema LIKE $1 OR table_name LIKE $1)"
53+
query += " and (table_schema like $1 or table_name like $1)"
5454
args = append(args, fmt.Sprintf("%%%s%%", filter))
5555
}
56-
query += " ORDER BY table_schema, table_name"
56+
query += " order by table_schema, table_name"
5757

5858
rows, err := c.db.QueryContext(ctx, query, args...)
5959
if err != nil {

0 commit comments

Comments
 (0)