Skip to content

Commit e726335

Browse files
committed
code clean up
1 parent 8e07a18 commit e726335

8 files changed

Lines changed: 144 additions & 157 deletions

File tree

checks.yaml

Lines changed: 6 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
version: "1"
22
validations:
3-
# todo: support arrays of data sets [ ... ]
4-
- dataset: ch-local@nyc_taxi.trips_small
3+
- dataset: ch-local@[nyc_taxi.trips_small]
54
where: "pickup_datetime > '2014-01-01'"
65
checks:
76
- id: row_count > 0
87
description: "data is present" # optional
98
severity: error # optional (error, warn, info), default "error"
109

10+
- id: row_count between 100 and 30000
11+
description: "data is not too big"
12+
severity: error
13+
1114
- id: null_count(pickup_ntaname) == 0
1215
description: "no nulls in column" # optional
1316
severity: error
@@ -16,7 +19,6 @@ validations:
1619
description: "min check"
1720
severity: error
1821

19-
# todo: support "between X and Y"
2022
- id: stddevPop(trip_distance) < 100_000
2123
description: "check stddev value"
2224
severity: error
@@ -51,34 +53,4 @@ validations:
5153
# stddev(col) between 1 and 100_000_000
5254
# custom
5355

54-
# AI anomaly detection
55-
56-
# empties / blanks
57-
# invalid count with filter
58-
# duplicates
59-
# avg_length
60-
# cross
61-
# distribution
62-
# duplicate_count
63-
# duplicate_percent
64-
# failed rows
65-
# freshness
66-
# group by
67-
# group evolution
68-
# invalid_count
69-
# invalid_percent
70-
# max_length
71-
# min_length
72-
# missing_count
73-
# missing_percent
74-
# percentile
75-
# reconciliation
76-
# reference
77-
# schema
78-
# schema evolution
79-
# stddev_pop
80-
# stddev_samp
81-
# user-defined
82-
# variance
83-
# var_pop
84-
# var_samp
56+
# AI anomaly detection

cmd/import.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,11 @@ This command is useful for quickly onboarding data from external systems, allowi
4343
},
4444
}
4545

46-
cmd.Flags().StringVarP(&dataSource, "datasource", "d", "", "Datasource")
47-
cmd.Flags().StringVarP(&filter, "filter", "f", "", "Filter")
48-
cmd.Flags().BoolVarP(&updateCfg, "update-checks", "u", false, "Update checks config in place")
46+
cmd.Flags().StringVarP(&dataSource, "datasource", "d", "", "Datasource from which datasets will be imported")
47+
_ = cmd.MarkFlagRequired("datasource") // todo: support import from all
48+
49+
cmd.Flags().StringVarP(&filter, "filter", "f", "", "Filter applied for dataset selection")
50+
cmd.Flags().BoolVarP(&updateCfg, "update-checks", "u", false, "Update checks config file in place")
4951

5052
return cmd
5153
}

cmd/profile.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ and helps in making better decisions about data processing and analysis.
3939
}
4040

4141
for _, curDataSet := range dataSetsToProfile {
42-
metrics, err := app.ProfileDataSourceById(dataSource, curDataSet)
42+
metrics, err := app.ProfileDataset(dataSource, curDataSet)
4343
if err != nil {
4444
log.Printf("Failed to profile %s: %s\n", curDataSet, err)
4545
} else {

dbq.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ datasources:
99
password: changeme
1010
database: default
1111
datasets:
12+
- nyc_taxi.trips_big
1213
- nyc_taxi.trips_small
1314
- id: pgsql
1415
type: postgres

internal/app.go

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package internal
22

33
import (
4-
"errors"
54
"fmt"
65
"github.com/spf13/cobra"
76
"github.com/spf13/viper"
@@ -13,7 +12,7 @@ import (
1312
type DbqApp interface {
1413
PingDataSource(srcId string) (string, error)
1514
ImportDatasets(srcId string, filter string) ([]string, error)
16-
ProfileDataSourceById(srcId string, dataset string) (*TableMetrics, error)
15+
ProfileDataset(srcId string, dataset string) (*TableMetrics, error)
1716
GetDbqConfig() *DbqConfig
1817
SaveDbqConfig() error
1918
FindDataSourceById(srcId string) *DataSource
@@ -26,9 +25,9 @@ type DbqAppImpl struct {
2625
}
2726

2827
func NewDbqApp(dbqConfigPath string) DbqApp {
29-
dbqConfig := initConfig(dbqConfigPath)
28+
dbqConfig, dbqConfigUsedPath := initConfig(dbqConfigPath)
3029
return &DbqAppImpl{
31-
dbqConfigPath: dbqConfigPath,
30+
dbqConfigPath: dbqConfigUsedPath,
3231
dbqConfig: dbqConfig,
3332
}
3433
}
@@ -56,16 +55,16 @@ func (app *DbqAppImpl) ImportDatasets(srcId string, filter string) ([]string, er
5655
return []string{}, err
5756
}
5857

59-
return cnn.ImportDataSets(filter)
58+
return cnn.ImportDatasets(filter)
6059
}
6160

62-
func (app *DbqAppImpl) ProfileDataSourceById(srcId string, dataset string) (*TableMetrics, error) {
61+
func (app *DbqAppImpl) ProfileDataset(srcId string, dataset string) (*TableMetrics, error) {
6362
var dataSource = app.FindDataSourceById(srcId)
6463
cnn, err := getDbqConnector(*dataSource)
6564
if err != nil {
6665
return nil, err
6766
}
68-
return cnn.ProfileDataSet(dataset)
67+
return cnn.ProfileDataset(dataset)
6968
}
7069

7170
func (app *DbqAppImpl) GetDbqConfig() *DbqConfig {
@@ -103,7 +102,7 @@ func (app *DbqAppImpl) RunCheck(check *Check, dataSource *DataSource, dataSet st
103102
return cnn.RunCheck(check, dataSet, defaultWhere)
104103
}
105104

106-
func initConfig(dbqConfigPath string) *DbqConfig {
105+
func initConfig(dbqConfigPath string) (*DbqConfig, string) {
107106
v := viper.New()
108107

109108
if dbqConfigPath != "" {
@@ -126,7 +125,7 @@ func initConfig(dbqConfigPath string) *DbqConfig {
126125
cobra.CheckErr(err)
127126
}
128127

129-
return &dbqConfig
128+
return &dbqConfig, v.ConfigFileUsed()
130129
}
131130

132131
func getDbqConnector(ds DataSource) (DbqConnector, error) {
@@ -135,6 +134,6 @@ func getDbqConnector(ds DataSource) (DbqConnector, error) {
135134
case "clickhouse":
136135
return NewClickhouseDbqConnector(ds)
137136
default:
138-
return nil, errors.New(fmt.Sprintf("Data source type '%s' is not supported.", dsType))
137+
return nil, fmt.Errorf("data source type '%s' is not supported", dsType)
139138
}
140139
}

0 commit comments

Comments
 (0)