Skip to content

Commit d173535

Browse files
committed
fix(test): improve table freshness test to avoid overflow
1 parent 950b9d2 commit d173535

5 files changed

Lines changed: 45 additions & 176 deletions

File tree

testgen/template/flavors/bigquery/gen_query_tests/gen_table_changed_test.sql

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,14 @@ newtests AS (
124124
WHEN general_type = 'A' THEN
125125
'CAST(MIN(@@@) AS STRING) || "|" || CAST(MAX(@@@) AS STRING) || "|" || CAST(COUNT(DISTINCT @@@) AS STRING) || "|" || CAST(SUM(LENGTH(@@@)) AS STRING)'
126126
WHEN general_type = 'N' THEN
127-
'CAST(MIN(@@@) AS STRING) || "|" || CAST(MAX(@@@) AS STRING) || "|" || CAST(SUM(@@@) AS STRING) || "|" || CAST(ROUND(AVG(@@@), 5) AS STRING) || "|" || CAST(ROUND(STDDEV(CAST(@@@ AS FLOAT64)), 5) AS STRING)'
127+
'ARRAY_TO_STRING([
128+
CAST(COUNT(@@@) AS STRING),
129+
CAST(COUNT(DISTINCT MOD(CAST(COALESCE(@@@,0) AS NUMERIC) * 1000000, CAST(1000003 AS NUMERIC))) AS STRING),
130+
COALESCE(CAST(ROUND(MIN(CAST(@@@ AS NUMERIC)), 6) AS STRING), ''''),
131+
COALESCE(CAST(ROUND(MAX(CAST(@@@ AS NUMERIC)), 6) AS STRING), ''''),
132+
CAST(MOD(COALESCE(SUM(MOD(CAST(ABS(COALESCE(@@@,0)) AS NUMERIC) * 1000000, CAST(1000000007 AS NUMERIC))), CAST(0 AS NUMERIC)), CAST(1000000007 AS NUMERIC)) AS STRING),
133+
CAST(MOD(COALESCE(SUM(MOD(CAST(ABS(COALESCE(@@@,0)) AS NUMERIC) * 1000000, CAST(1000000009 AS NUMERIC))), CAST(0 AS NUMERIC)), CAST(1000000009 AS NUMERIC)) AS STRING)
134+
], ''|'', '''')'
128135
END,
129136
'@@@', '`' || column_name || '`'),
130137
' || "|" || '

testgen/template/flavors/databricks/gen_query_tests/gen_table_changed_test.sql

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,9 +121,16 @@ newtests
121121
CASE
122122
WHEN general_type = 'D' THEN 'MIN(@@@)::STRING || ''|'' || MAX(@@@::STRING) || ''|'' || COUNT(DISTINCT @@@)::STRING'
123123
WHEN general_type = 'A' THEN 'MIN(@@@)::STRING || ''|'' || MAX(@@@::STRING) || ''|'' || COUNT(DISTINCT @@@)::STRING || ''|'' || SUM(LENGTH(@@@))::STRING'
124-
WHEN general_type = 'N' THEN 'MIN(@@@)::STRING || ''|'' || MAX(@@@::STRING) || ''|'' || SUM(@@@)::STRING || ''|'' || ROUND(AVG(@@@), 5)::STRING || ''|'' || ROUND(STDDEV(@@@::FLOAT), 5)::STRING'
124+
WHEN general_type = 'N' THEN 'CONCAT_WS(''|'',
125+
COUNT(@@@)::STRING,
126+
COUNT(DISTINCT MOD((COALESCE(@@@,0)::DECIMAL(38,6) * 1000000)::DECIMAL(38,0), 1000003))::STRING,
127+
COALESCE((MIN(@@@)::DECIMAL(38,6))::STRING, ''''),
128+
COALESCE((MAX(@@@)::DECIMAL(38,6))::STRING, ''''),
129+
COALESCE(MOD(COALESCE(SUM(MOD((ABS(COALESCE(@@@,0))::DECIMAL(38,6) * 1000000)::DECIMAL, 1000000007)), 0), 1000000007)::STRING, ''''),
130+
COALESCE(MOD(COALESCE(SUM(MOD((ABS(COALESCE(@@@,0))::DECIMAL(38,6) * 1000000)::DECIMAL, 1000000009)), 0), 1000000009)::STRING, '''')
131+
)'
125132
END,
126-
'@@@', '"' || column_name || '"'),
133+
'@@@', '`' || column_name || '`'),
127134
' || ''|'' || '
128135
ORDER BY element_type, fingerprint_order, column_name) as fingerprint
129136
FROM combined

testgen/template/flavors/mssql/gen_query_tests/gen_table_changed_test.sql

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@ WITH last_run AS (SELECT r.table_groups_id, MAX(run_date) AS last_run_date
1414
AND ts.id = '{TEST_SUITE_ID}'
1515
AND p.run_date::DATE <= '{AS_OF_DATE}'
1616
GROUP BY r.table_groups_id),
17-
curprof AS (SELECT p.profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, column_type,
18-
distinct_value_ct, record_ct, max_value, min_value, avg_value, stdev_value, null_value_ct
17+
curprof AS (SELECT p.profile_run_id, p.schema_name, p.table_name, p.column_name, p.functional_data_type,
18+
p.general_type, p.distinct_value_ct, p.record_ct, p.max_value, p.min_value,
19+
p.avg_value, p.stdev_value, p.null_value_ct
1920
FROM last_run lr
2021
INNER JOIN profile_results p
2122
ON (lr.table_groups_id = p.table_groups_id
@@ -28,7 +29,7 @@ locked AS (SELECT schema_name, table_name
2829
AND lock_refresh = 'Y'),
2930
-- IDs - TOP 2
3031
id_cols
31-
AS ( SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, column_type,
32+
AS ( SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type,
3233
distinct_value_ct,
3334
ROW_NUMBER() OVER (PARTITION BY schema_name, table_name
3435
ORDER BY
@@ -42,7 +43,7 @@ id_cols
4243
AND functional_data_type ILIKE 'ID%'),
4344
-- Process Date - TOP 1
4445
process_date_cols
45-
AS (SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, column_type,
46+
AS (SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type,
4647
distinct_value_ct,
4748
ROW_NUMBER() OVER (PARTITION BY schema_name, table_name
4849
ORDER BY
@@ -57,7 +58,7 @@ process_date_cols
5758
AND functional_data_type ILIKE 'process%'),
5859
-- Transaction Date - TOP 1
5960
tran_date_cols
60-
AS ( SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, column_type,
61+
AS ( SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type,
6162
distinct_value_ct,
6263
ROW_NUMBER() OVER (PARTITION BY schema_name, table_name
6364
ORDER BY
@@ -70,9 +71,9 @@ tran_date_cols
7071

7172
-- Numeric Measures
7273
numeric_cols
73-
AS ( SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type, column_type,
74+
AS ( SELECT profile_run_id, schema_name, table_name, column_name, functional_data_type, general_type,
7475
/*
75-
-- Subscores
76+
-- Subscores -- save for reference
7677
distinct_value_ct * 1.0 / NULLIF(record_ct, 0) AS cardinality_score,
7778
(max_value - min_value) / NULLIF(ABS(NULLIF(avg_value, 0)), 1) AS range_score,
7879
LEAST(1, LOG(GREATEST(distinct_value_ct, 2))) / LOG(GREATEST(record_ct, 2)) AS nontriviality_score,
@@ -98,19 +99,19 @@ numeric_cols_ranked
9899
FROM numeric_cols
99100
WHERE change_detection_score IS NOT NULL),
100101
combined
101-
AS ( SELECT profile_run_id, schema_name, table_name, column_name, 'ID' AS element_type, general_type, column_type, 10 + rank AS fingerprint_order
102+
AS ( SELECT profile_run_id, schema_name, table_name, column_name, 'ID' AS element_type, general_type, 10 + rank AS fingerprint_order
102103
FROM id_cols
103104
WHERE rank <= 2
104105
UNION ALL
105-
SELECT profile_run_id, schema_name, table_name, column_name, 'DATE_P' AS element_type, general_type, column_type, 20 + rank AS fingerprint_order
106+
SELECT profile_run_id, schema_name, table_name, column_name, 'DATE_P' AS element_type, general_type, 20 + rank AS fingerprint_order
106107
FROM process_date_cols
107108
WHERE rank = 1
108109
UNION ALL
109-
SELECT profile_run_id, schema_name, table_name, column_name, 'DATE_T' AS element_type, general_type, column_type, 30 + rank AS fingerprint_order
110+
SELECT profile_run_id, schema_name, table_name, column_name, 'DATE_T' AS element_type, general_type, 30 + rank AS fingerprint_order
110111
FROM tran_date_cols
111112
WHERE rank = 1
112113
UNION ALL
113-
SELECT profile_run_id, schema_name, table_name, column_name, 'MEAS' AS element_type, general_type, column_type, 40 + rank AS fingerprint_order
114+
SELECT profile_run_id, schema_name, table_name, column_name, 'MEAS' AS element_type, general_type, 40 + rank AS fingerprint_order
114115
FROM numeric_cols_ranked
115116
WHERE rank = 1 ),
116117
newtests AS (
@@ -121,10 +122,16 @@ newtests AS (
121122
'CAST(COUNT(*) AS varchar) + ''|'' + ' || STRING_AGG(
122123
REPLACE(
123124
CASE
124-
WHEN general_type = 'D' THEN 'CAST(MIN(@@@) AS NVARCHAR) + ''|'' + MAX(CAST(@@@ AS NVARCHAR)) + ''|'' + CAST(COUNT(DISTINCT @@@) AS NVARCHAR)'
125-
WHEN general_type = 'A' THEN 'CAST(MIN(@@@) AS NVARCHAR) + ''|'' + MAX(CAST(@@@ AS NVARCHAR)) + ''|'' + CAST(COUNT(DISTINCT @@@) AS NVARCHAR) + ''|'' + CAST(SUM(LEN(@@@)) AS NVARCHAR)'
126-
WHEN general_type = 'N' AND column_type ILIKE '%int%' THEN 'CAST(MIN(@@@) AS NVARCHAR) + ''|'' + MAX(CAST(@@@ AS NVARCHAR)) + ''|'' + CAST(SUM(CAST(@@@ AS BIGINT)) AS NVARCHAR) + ''|'' + CAST(ROUND(AVG(CAST(@@@ AS DECIMAL(30,5))), 5) AS NVARCHAR) + ''|'' + CAST(ROUND(STDEV(CAST(@@@ AS FLOAT)), 5) AS NVARCHAR)'
127-
WHEN general_type = 'N' AND column_type NOT ILIKE '%int%' THEN 'CAST(MIN(@@@) AS NVARCHAR) + ''|'' + MAX(CAST(@@@ AS NVARCHAR)) + ''|'' + CAST(SUM(@@@) AS NVARCHAR) + ''|'' + CAST(ROUND(AVG(@@@), 5) AS NVARCHAR) + ''|'' + CAST(ROUND(STDEV(CAST(@@@ AS FLOAT)), 5) AS NVARCHAR)'
125+
WHEN general_type = 'D' THEN 'CAST(MIN(@@@) AS NVARCHAR) + ''|'' + CAST(MAX(@@@) AS NVARCHAR) + ''|'' + CAST(COUNT_BIG(DISTINCT @@@) AS NVARCHAR)'
126+
WHEN general_type = 'A' THEN 'CAST(MIN(@@@) AS NVARCHAR) + ''|'' + CAST(MAX(@@@) AS NVARCHAR) + ''|'' + CAST(COUNT_BIG(DISTINCT @@@) AS NVARCHAR) + ''|'' + CAST(SUM(LEN(@@@)) AS NVARCHAR)'
127+
WHEN general_type = 'N' THEN 'CONCAT_WS(''|'',
128+
CAST(COUNT_BIG(@@@) AS VARCHAR(20)),
129+
CAST(COUNT_BIG(DISTINCT CAST(CAST(CAST(COALESCE(@@@,0) AS DECIMAL(38,6)) * 1000000 AS DECIMAL(38,0)) % 1000003 AS INT)) AS VARCHAR(20)),
130+
COALESCE(CAST(CAST(MIN(@@@) AS DECIMAL(38,6)) AS VARCHAR(50)), ''''),
131+
COALESCE(CAST(CAST(MAX(@@@) AS DECIMAL(38,6)) AS VARCHAR(50)), ''''),
132+
CAST((COALESCE(SUM(CAST(CAST(ABS(CAST(COALESCE(@@@,0) AS DECIMAL(38,6))) * 1000000 AS DECIMAL(38,0)) % 1000000007 AS DECIMAL(38,0))), 0) % 1000000007) AS VARCHAR(12)),
133+
CAST((COALESCE(SUM(CAST(CAST(ABS(CAST(COALESCE(@@@,0) AS DECIMAL(38,6))) * 1000000 AS DECIMAL(38,0)) % 1000000009 AS DECIMAL(38,0))), 0) % 1000000009) AS VARCHAR(12))
134+
)'
128135
END,
129136
'@@@', '"' || column_name || '"'
130137
),

testgen/template/flavors/postgresql/gen_query_tests/gen_table_changed_test.sql

Lines changed: 0 additions & 157 deletions
This file was deleted.

testgen/template/gen_query_tests/gen_table_changed_test.sql

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,12 @@ newtests
121121
CASE
122122
WHEN general_type = 'D' THEN 'MIN(@@@)::VARCHAR || ''|'' || MAX(@@@::VARCHAR) || ''|'' || COUNT(DISTINCT @@@)::VARCHAR'
123123
WHEN general_type = 'A' THEN 'MIN(@@@)::VARCHAR || ''|'' || MAX(@@@::VARCHAR) || ''|'' || COUNT(DISTINCT @@@)::VARCHAR || ''|'' || SUM(LENGTH(@@@))::VARCHAR'
124-
WHEN general_type = 'N' THEN 'MIN(@@@)::VARCHAR || ''|'' || MAX(@@@::VARCHAR) || ''|'' || SUM(@@@)::VARCHAR || ''|'' || ROUND(AVG(@@@), 5)::VARCHAR || ''|'' || ROUND(STDDEV(@@@::FLOAT), 5)::VARCHAR'
124+
WHEN general_type = 'N' THEN 'COUNT(@@@)::VARCHAR || ''|'' ||
125+
COUNT(DISTINCT MOD((COALESCE(@@@,0)::DECIMAL(38,6) * 1000000)::DECIMAL(38,0), 1000003))::VARCHAR || ''|'' ||
126+
COALESCE((MIN(@@@)::DECIMAL(38,6))::VARCHAR, '''') || ''|'' ||
127+
COALESCE((MAX(@@@)::DECIMAL(38,6))::VARCHAR, '''') || ''|'' ||
128+
COALESCE(MOD(COALESCE(SUM(MOD((ABS(COALESCE(@@@,0))::DECIMAL(38,6) * 1000000)::DECIMAL, 1000000007)), 0), 1000000007)::VARCHAR, '''') || ''|'' ||
129+
COALESCE(MOD(COALESCE(SUM(MOD((ABS(COALESCE(@@@,0))::DECIMAL(38,6) * 1000000)::DECIMAL, 1000000009)), 0), 1000000009)::VARCHAR, '''')'
125130
END,
126131
'@@@', '"' || column_name || '"'),
127132
' || ''|'' || '

0 commit comments

Comments
 (0)