Skip to content

Commit e633103

Browse files
committed
code clean up
1 parent 5df2ee4 commit e633103

7 files changed

Lines changed: 42 additions & 33 deletions

File tree

cmd/check.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ By automating these checks, you can proactively identify and address data qualit
4949
log.Printf("Failed to run check: %s", err.Error())
5050
}
5151
// todo: act on check result
52+
// if check.Severity {...}
5253
}
5354
}
5455
}
@@ -57,7 +58,7 @@ By automating these checks, you can proactively identify and address data qualit
5758
},
5859
}
5960

60-
cmd.Flags().StringVarP(&checksFile, "checks", "c", "", "Validation checks")
61+
cmd.Flags().StringVarP(&checksFile, "checks", "c", "", "Path to data quality checks file")
6162
_ = cmd.MarkFlagRequired("checks")
6263

6364
return cmd

cmd/import.go

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,32 +20,43 @@ func NewImportCommand(app internal.DbqApp) *cobra.Command {
2020
This command is useful for quickly onboarding data from external systems, allowing you to easily access and work with already existing data.
2121
`,
2222
RunE: func(cmd *cobra.Command, args []string) error {
23-
datasets, err := app.ImportDatasets(dataSource, filter)
24-
if err != nil {
25-
log.Println("Failed to fetch datasets: " + err.Error())
26-
return nil
23+
var importFromSources []string
24+
if dataSource != "" {
25+
importFromSources = append(importFromSources, dataSource)
26+
} else {
27+
for _, ds := range app.GetDbqConfig().DataSources {
28+
importFromSources = append(importFromSources, ds.ID)
29+
}
2730
}
2831

29-
log.Printf("Found %d datasets to import: %v\n", len(datasets), datasets)
30-
if updateCfg {
32+
for _, curDataSource := range importFromSources {
33+
datasets, err := app.ImportDatasets(curDataSource, filter)
34+
if err != nil {
35+
log.Println("Failed to fetch datasets: " + err.Error())
36+
return nil
37+
}
38+
39+
log.Printf("Found %d datasets in %s to import: %v\n", len(datasets), curDataSource, datasets)
40+
3141
ds := app.FindDataSourceById(dataSource)
3242
if ds != nil {
3343
ds.Datasets = datasets
34-
err := app.SaveDbqConfig()
35-
if err != nil {
36-
return err
37-
}
38-
log.Println("dbq config has been updated")
3944
}
4045
}
4146

47+
if updateCfg {
48+
err := app.SaveDbqConfig()
49+
if err != nil {
50+
return err
51+
}
52+
log.Println("dbq config has been updated")
53+
}
54+
4255
return nil
4356
},
4457
}
4558

4659
cmd.Flags().StringVarP(&dataSource, "datasource", "d", "", "Datasource from which datasets will be imported")
47-
_ = cmd.MarkFlagRequired("datasource") // todo: support import from all
48-
4960
cmd.Flags().StringVarP(&filter, "filter", "f", "", "Filter applied for dataset selection")
5061
cmd.Flags().BoolVarP(&updateCfg, "update-checks", "u", false, "Update checks config file in place")
5162

cmd/ping.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ This is useful for quickly determining if the data source is online and respondi
3636
},
3737
}
3838

39-
cmd.Flags().StringVarP(&dataSource, "datasource", "d", "", "Datasource")
39+
cmd.Flags().StringVarP(&dataSource, "datasource", "d", "", "Datasource to ping")
4040

4141
return cmd
4242
}

cmd/profile.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,22 +47,21 @@ and helps in making better decisions about data processing and analysis.
4747
}
4848
}
4949

50+
// todo: introduce output format flag
5051
jsonData, err := json.Marshal(profileResults)
5152
if err != nil {
5253
log.Fatalf("Failed to marshal metrics to JSON: %v", err)
5354
}
54-
55-
// todo: handle empty tables
5655
log.Println(string(jsonData))
5756

5857
return nil
5958
},
6059
}
6160

62-
cmd.Flags().StringVarP(&dataSource, "datasource", "d", "", "Datasource")
61+
cmd.Flags().StringVarP(&dataSource, "datasource", "d", "", "Datasource in which datasets will be profiled")
6362
_ = cmd.MarkFlagRequired("datasource")
6463

65-
cmd.Flags().StringVarP(&dataSet, "dataset", "s", "", "Dataset")
64+
cmd.Flags().StringVarP(&dataSet, "dataset", "s", "", "Dataset within specified data source")
6665

6766
return cmd
6867
}

cmd/root.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ func AddCommands(app internal.DbqApp) {
2828
}
2929

3030
func init() {
31-
// todo: workaround for bootstrap config flag & unsupported flag issue
31+
// workaround for bootstrap config flag & unsupported flag issue
3232
var dbqConfigFile string
3333
rootCmd.PersistentFlags().StringVar(&dbqConfigFile, "config", "", "config file (default is $HOME/.dbq.yaml or ./dbq.yaml)")
3434
}

internal/clickhouse.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,6 @@ func (c *ClickhouseDbqConnector) ProfileDataset(dataset string) (*TableMetrics,
103103

104104
log.Printf("Calculating metrics for table: %s", dataset)
105105

106-
// ProfileDataSet todo: optimize/batch queries where possible
107-
108106
// Total Row Count
109107
log.Printf("Fetching total row count...")
110108
err := c.cnn.QueryRow(ctx, fmt.Sprintf("SELECT count() FROM %s", dataset)).Scan(&metrics.TotalRows)

readme.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ It is designed to be easy to use and integrate into your existing workflow.
1212

1313
## 0.1
1414
- [x] basic structure
15-
- [ ] define checks cfg v1
16-
- [ ] checks cfg parser v1
17-
- [ ] complete clickhouse support
15+
- [x] define checks cfg v1
16+
- [x] checks cfg parser v1
17+
- [x] complete clickhouse support
1818
- [x] ping
1919
- [x] import datasets
2020
- [x] profile dataset
@@ -23,25 +23,25 @@ It is designed to be easy to use and integrate into your existing workflow.
2323
- [x] count of nulls and blanks
2424
- [x] most frequent value in column
2525
- [x] JSON export
26-
- [ ] run checks
27-
- [ ] implement support for custom sql check
28-
- [ ] implement aliases for common checks based on raw sql check
29-
- [ ] basic cross validation (dataset is defined)
26+
- [x] run checks
27+
- [x] implement support for custom sql check
28+
- [x] implement aliases for common checks based on raw sql check
3029
- [x] fix cmd descriptions
31-
- [ ] review todos
30+
- [x] review todos
31+
- [ ] basic cross validation (dataset is defined)
3232
- [ ] review logs
3333
- [ ] review crashes (wrong arguments)
3434
- [ ] default values (e.g. severity)
3535
- [ ] improve output
3636
- [ ] quiet/verbose mode for logs
37+
- [ ] docs
3738

38-
## 0.2
39+
## 0.x
3940
- config validation
4041
- add postgres support
4142
- CLI for adding more checks
42-
- schema changes checks
4343
- AirFlow integration (operator)
44-
44+
- output format flag
4545

4646
---
4747

0 commit comments

Comments
 (0)