77 "github.com/ClickHouse/clickhouse-go/v2"
88 "github.com/ClickHouse/clickhouse-go/v2/lib/driver"
99 "log"
10+ "regexp"
1011 "strings"
1112 "time"
1213)
@@ -243,6 +244,116 @@ func (c *ClickhouseDbqConnector) ProfileDataSet(dataSet string) (*TableMetrics,
243244 return metrics , nil
244245}
245246
247+ func (c * ClickhouseDbqConnector ) RunCheck (check * Check , dataSet string , defaultWhere string ) (string , error ) {
248+ if c .cnn == nil {
249+ return "" , fmt .Errorf ("database connection is not initialized" )
250+ }
251+
252+ query , err := generateDataCheckQuery (check , dataSet , defaultWhere )
253+ if err != nil {
254+ return "" , fmt .Errorf ("failed to generate SQL for check %s (%s): %s" , check .ID , dataSet , err .Error ())
255+ }
256+
257+ log .Printf ("Executing SQL for (%s): %s" , check .ID , query )
258+
259+ startTime := time .Now ()
260+ rows , err := c .cnn .Query (context .Background (), query )
261+ if err != nil {
262+ return "" , fmt .Errorf ("failed to query database: %w" , err )
263+ }
264+ defer rows .Close ()
265+ elapsed := time .Since (startTime ).Milliseconds ()
266+
267+ for rows .Next () {
268+ var checkPassed bool
269+ if err := rows .Scan (& checkPassed ); err != nil {
270+ return "" , fmt .Errorf ("failed to scan row: %w" , err )
271+ }
272+
273+ log .Printf ("Result is: %t (%d ms)" , checkPassed , elapsed )
274+ log .Printf ("---" )
275+ }
276+
277+ if err = rows .Err (); err != nil {
278+ return "" , fmt .Errorf ("error occurred during row iteration: %w" , err )
279+ }
280+
281+ return "" , nil
282+ }
283+
284+ func generateDataCheckQuery (check * Check , dataSet string , whereClause string ) (string , error ) {
285+ var sqlQuery string
286+
287+ // handle raw_query first
288+ if check .ID == CheckTypeRawQuery {
289+ if check .Query == "" {
290+ return "" , fmt .Errorf ("check with id 'raw_query' requires a 'query' field" )
291+ }
292+ sqlQuery = strings .ReplaceAll (check .Query , "{{table}}" , dataSet )
293+
294+ if whereClause != "" {
295+ // todo: more sophisticated check might be needed
296+ if strings .Contains (strings .ToLower (sqlQuery ), " where " ) {
297+ sqlQuery = fmt .Sprintf ("%s and (%s)" , sqlQuery , whereClause )
298+ } else {
299+ sqlQuery = fmt .Sprintf ("%s where %s" , sqlQuery , whereClause )
300+ }
301+ }
302+
303+ return sqlQuery , nil
304+ }
305+
306+ isAggFunction := startWithAnyOf ([]string {
307+ "min" , "max" , "avg" , "stddevPop" , "sum" ,
308+ }, check .ID )
309+
310+ var checkExpression string
311+ switch {
312+ case strings .HasPrefix (check .ID , "row_count" ):
313+ // format "row_count <operator> <value>"
314+ parts := strings .Fields (check .ID )
315+ if len (parts ) != 3 {
316+ return "" , fmt .Errorf ("invalid format for row_count check: %s" , check .ID )
317+ }
318+ checkExpression = fmt .Sprintf ("count() %s %s" , parts [1 ], parts [2 ])
319+
320+ case strings .HasPrefix (check .ID , "null_count" ):
321+ // format "null_count(<column_name>) <operator> <value>"
322+ re := regexp .MustCompile (`null_count\((.*?)\)\s*(==|!=|>|<|>=|<=)\s*(\d+)` )
323+ matches := re .FindStringSubmatch (check .ID )
324+ if len (matches ) != 4 {
325+ return "" , fmt .Errorf ("invalid format for null_count check: %s" , check .ID )
326+ }
327+
328+ column := matches [1 ]
329+ operator := matches [2 ]
330+ value := matches [3 ]
331+ checkExpression = fmt .Sprintf ("countIf(%s IS NULL) %s %s" , column , operator , value )
332+
333+ case isAggFunction :
334+ // format: <func>(<column_name>) <operator> <value>
335+ re := regexp .MustCompile (`^(min|max|avg|stddevPop|sum)\(([^)]+)\)\s+(==|>=|<=|>|<)\s+(.*)$` )
336+ matches := re .FindStringSubmatch (check .ID )
337+ if len (matches ) < 4 {
338+ return "" , fmt .Errorf ("invalid format for aggregation function check: %s" , check .ID )
339+ }
340+ checkExpression = fmt .Sprintf ("%s" , matches [0 ])
341+
342+ default :
343+ // Assume the ID itself is a valid boolean expression if no specific pattern matches
344+ // This is less robust but covers simple cases.
345+ log .Printf ("Warning: Check ID '%s' did not match known patterns. Assuming it's a direct SQL boolean expression." , check .ID )
346+ checkExpression = check .ID
347+ }
348+
349+ sqlQuery = fmt .Sprintf ("select %s from %s" , checkExpression , dataSet )
350+ if whereClause != "" {
351+ sqlQuery = fmt .Sprintf ("%s where %s" , sqlQuery , whereClause )
352+ }
353+
354+ return sqlQuery , nil
355+ }
356+
246357// isNumericCHType checks if a ClickHouse data type string represents a numeric type
247358// that supports standard aggregate functions like min, max, avg, stddev
248359func isNumericCHType (dataType string ) bool {
@@ -260,3 +371,12 @@ func isStringCHType(dataType string) bool {
260371 return strings .HasPrefix (dataType , "string" ) ||
261372 strings .HasPrefix (dataType , "fixedstring" )
262373}
374+
375+ func startWithAnyOf (prefixes []string , s string ) bool {
376+ for _ , prefix := range prefixes {
377+ if strings .HasPrefix (s , prefix ) {
378+ return true
379+ }
380+ }
381+ return false
382+ }
0 commit comments