88 "github.com/ClickHouse/clickhouse-go/v2/lib/driver"
99 "log"
1010 "log/slog"
11+ "reflect"
1112 "regexp"
1213 "strings"
1314 "time"
@@ -84,7 +85,7 @@ func (c *ClickhouseDbqConnector) ImportDatasets(filter string) ([]string, error)
8485 return datasets , nil
8586}
8687
87- func (c * ClickhouseDbqConnector ) ProfileDataset (dataset string ) (* TableMetrics , error ) {
88+ func (c * ClickhouseDbqConnector ) ProfileDataset (dataset string , sample bool ) (* TableMetrics , error ) {
8889 startTime := time .Now ()
8990 ctx := context .Background ()
9091
@@ -112,6 +113,46 @@ func (c *ClickhouseDbqConnector) ProfileDataset(dataset string) (*TableMetrics,
112113 }
113114 slog .Debug ("Total rows: %d" , metrics .TotalRows )
114115
116+ // sample data if enabled
117+ if sample {
118+ sampleQuery := fmt .Sprintf ("select * from %s.%s order by rand() limit 100" , databaseName , tableName )
119+
120+ toCtx , cancel := context .WithTimeout (context .Background (), 30 * time .Second )
121+ defer cancel ()
122+
123+ rows , err := c .cnn .Query (toCtx , sampleQuery )
124+ if err != nil {
125+ log .Printf ("Warning: Failed to sample data %s: %v" , err )
126+ }
127+ defer rows .Close ()
128+
129+ var allRows []map [string ]interface {}
130+ for rows .Next () {
131+ scanArgs := make ([]interface {}, len (rows .Columns ()))
132+ for i , colType := range rows .ColumnTypes () {
133+ scanType := colType .ScanType ()
134+ valuePtr := reflect .New (scanType ).Interface ()
135+ scanArgs [i ] = valuePtr
136+ }
137+
138+ err = rows .Scan (scanArgs ... )
139+ if err != nil {
140+ log .Printf ("Warning: Failed to scan row: %v" , err )
141+ continue
142+ }
143+
144+ rowData := make (map [string ]interface {})
145+ for i , colName := range rows .Columns () {
146+ scannedValue := reflect .ValueOf (scanArgs [i ]).Elem ().Interface ()
147+ rowData [colName ] = scannedValue
148+ }
149+
150+ allRows = append (allRows , rowData )
151+ }
152+
153+ metrics .RowsSample = allRows
154+ }
155+
115156 // Get Column Information (Name and Type)
116157 columnsToProcess , err := fetchColumns (c .cnn , ctx , databaseName , tableName )
117158 if err != nil {
@@ -131,9 +172,10 @@ func (c *ClickhouseDbqConnector) ProfileDataset(dataset string) (*TableMetrics,
131172 colStartTime := time .Now ()
132173 log .Printf ("Processing column: %s (Type: %s)" , col .Name , col .Type )
133174 colMetrics := & ColumnMetrics {
134- ColumnName : col .Name ,
135- DataType : col .Type ,
136- ColumnComment : col .Comment ,
175+ ColumnName : col .Name ,
176+ DataType : col .Type ,
177+ ColumnComment : col .Comment ,
178+ ColumnPosition : col .Position ,
137179 }
138180
139181 // Null Count (all types)
@@ -259,7 +301,7 @@ func (c *ClickhouseDbqConnector) RunCheck(check *Check, dataset string, defaultW
259301
260302func fetchColumns (cnn driver.Conn , ctx context.Context , databaseName string , tableName string ) ([]ColumnInfo , error ) {
261303 columnQuery := `
262- SELECT name, type, comment
304+ SELECT name, type, comment, position
263305 FROM system.columns
264306 WHERE database = ? AND table = ?
265307 ORDER BY position`
@@ -273,10 +315,11 @@ func fetchColumns(cnn driver.Conn, ctx context.Context, databaseName string, tab
273315 var cols []ColumnInfo
274316 for rows .Next () {
275317 var colName , colType , comment string
276- if err := rows .Scan (& colName , & colType , & comment ); err != nil {
318+ var pos uint64
319+ if err := rows .Scan (& colName , & colType , & comment , & pos ); err != nil {
277320 return nil , fmt .Errorf ("failed to scan column info: %w" , err )
278321 }
279- cols = append (cols , ColumnInfo {Name : colName , Type : colType , Comment : comment })
322+ cols = append (cols , ColumnInfo {Name : colName , Type : colType , Comment : comment , Position : uint ( pos ) })
280323 }
281324
282325 if err = rows .Err (); err != nil {
0 commit comments