You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: testgen/template/dbsetup/050_populate_new_schema_metadata.sql
+10-10Lines changed: 10 additions & 10 deletions
Original file line number
Diff line number
Diff line change
@@ -1240,7 +1240,7 @@ WHERE {SUBSET_CONDITION}
1240
1240
('1271', '1100', 'Profile Anomaly', 'Potential_PII', 'mssql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'),
1241
1241
('1272', '1100', 'Profile Anomaly', 'Potential_PII', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'),
1242
1242
1243
-
('1273', '1001', 'Profile Anomaly' , 'Suggested_Type', 'databricks', NULL, 'SELECT TOP 20 `{COLUMN_NAME}`, COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY record_ct DESC;'),
1243
+
('1273', '1001', 'Profile Anomaly' , 'Suggested_Type', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY record_ct DESC LIMIT 20;'),
1244
1244
('1274', '1002', 'Profile Anomaly' , 'Non_Standard_Blanks', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE CASE WHEN `{COLUMN_NAME}` IN (''.'', ''?'', '' '') THEN 1 WHEN LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''-{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''0{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''9{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''x{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''z{2,}'' THEN 1 WHEN LOWER(`{COLUMN_NAME}`) IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'') THEN 1 WHEN LOWER(`{COLUMN_NAME}`) IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER(`{COLUMN_NAME}`) IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN `{COLUMN_NAME}` = '''' THEN 1 WHEN `{COLUMN_NAME}` IS NULL THEN 1 ELSE 0 END = 1 GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}`;'),
1245
1245
('1275', '1003', 'Profile Anomaly' , 'Invalid_Zip_USA', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE(`{COLUMN_NAME}`,''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` LIMIT 500;'),
1246
1246
('1276', '1004', 'Profile Anomaly' , 'Multiple_Types_Minor', 'databricks', NULL, 'SELECT DISTINCT column_name, columns.table_name, CASE WHEN data_type ILIKE ''timestamp%'' THEN lower(data_type) WHEN data_type ILIKE ''date'' THEN lower(data_type) WHEN data_type ILIKE ''boolean'' THEN ''boolean'' WHEN data_type = ''TEXT'' THEN ''varchar('' || CAST(character_maximum_length AS STRING) || '')'' WHEN data_type ILIKE ''char%'' THEN ''char('' || CAST(character_maximum_length AS STRING) || '')'' WHEN data_type = ''NUMBER'' AND numeric_precision = 38 AND numeric_scale = 0 THEN ''bigint'' WHEN data_type ILIKE ''num%'' THEN ''numeric('' || CAST(numeric_precision AS STRING) || '','' || CAST(numeric_scale AS STRING) || '')'' ELSE data_type END AS data_type FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND tables.table_type = ''BASE TABLE'' ORDER BY data_type, table_name;'),
@@ -1250,21 +1250,21 @@ WHERE {SUBSET_CONDITION}
1250
1250
('1280', '1008', 'Profile Anomaly' , 'Table_Pattern_Mismatch', 'databricks', NULL, 'SELECT DISTINCT column_name, columns.table_name FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND UPPER(tables.table_type) = ''BASE TABLE'' ORDER BY table_name; ' ),
1251
1251
('1281', '1009', 'Profile Anomaly' , 'Leading_Spaces', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN `{COLUMN_NAME}` BETWEEN '' !'' AND ''!'' THEN 1 ELSE 0 END) = 1 GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}`;' ),
1252
1252
('1282', '1010', 'Profile Anomaly' , 'Quoted_Values', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN `{COLUMN_NAME}` ILIKE ''"%"'' OR `{COLUMN_NAME}` ILIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}`;' ),
1253
-
('1283', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'databricks', NULL, 'SELECT A.* FROM (SELECT DISTINCT TOP 10 ''Numeric'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;`{COLUMN_NAME}`%> = 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT TOP 10 ''Non-Numeric'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;`{COLUMN_NAME}`%> != 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC) AS B ORDER BY data_type, count DESC;' ),
1254
-
('1284', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'databricks', NULL, 'SELECT A.* FROM (SELECT DISTINCT TOP 10 ''Date'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_DATE;`{COLUMN_NAME}`%> = 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT TOP 10 ''Non-Date'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_DATE;`{COLUMN_NAME}`%> != 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC) AS B ORDER BY data_type, count DESC;' ),
1253
+
('1283', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'databricks', NULL, 'SELECT A.* FROM (SELECT DISTINCT ''Numeric'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;`{COLUMN_NAME}`%> = 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC LIMIT 10) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT ''Non-Numeric'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;`{COLUMN_NAME}`%> != 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC) AS B ORDER BY data_type, count DESC LIMIT 10;' ),
1254
+
('1284', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'databricks', NULL, 'SELECT A.* FROM (SELECT DISTINCT ''Date'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_DATE;`{COLUMN_NAME}`%> = 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC LIMIT 10) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT ''Non-Date'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_DATE;`{COLUMN_NAME}`%> != 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC) AS B ORDER BY data_type, count DESC LIMIT 10;' ),
1255
1255
('1285', '1013', 'Profile Anomaly' , 'Small Missing Value Ct', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN `{COLUMN_NAME}` IN (''.'', ''?'', '' '') THEN 1 WHEN LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''-{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''0{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''9{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''x{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''z{2,}'' THEN 1 WHEN LOWER(`{COLUMN_NAME}`) IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'') THEN 1 WHEN LOWER(`{COLUMN_NAME}`) IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER(`{COLUMN_NAME}`) IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN `{COLUMN_NAME}` = '''' THEN 1 WHEN `{COLUMN_NAME}` IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}`;' ),
1256
-
('1286', '1014', 'Profile Anomaly' , 'Small Divergent Value Ct', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY COUNT(*) DESC;' ),
1257
-
('1287', '1015', 'Profile Anomaly' , 'Boolean_Value_Mismatch', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY COUNT(*) DESC;' ),
1258
-
('1288', '1016', 'Profile Anomaly' , 'Potential_Duplicates', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` HAVING COUNT(*)> 1 ORDER BY COUNT(*) DESC LIMIT 500;' ),
1256
+
('1286', '1014', 'Profile Anomaly' , 'Small Divergent Value Ct', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY count DESC;' ),
1257
+
('1287', '1015', 'Profile Anomaly' , 'Boolean_Value_Mismatch', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY count DESC;' ),
1258
+
('1288', '1016', 'Profile Anomaly' , 'Potential_Duplicates', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` HAVING count > 1 ORDER BY count DESC LIMIT 500;' ),
1259
1259
('1289', '1017', 'Profile Anomaly' , 'Standardized_Value_Matches', 'databricks', NULL, 'WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE(`{COLUMN_NAME}`, '''''',.-'', '''')) as possible_standard_value, COUNT(DISTINCT `{COLUMN_NAME}`) FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY UPPER(TRANSLATE(`{COLUMN_NAME}`, '''''',.-'', '''')) HAVING COUNT(DISTINCT `{COLUMN_NAME}`) > 1 ) SELECT DISTINCT a.`{COLUMN_NAME}`, possible_standard_value, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} a, cte b WHERE UPPER(TRANSLATE(a.`{COLUMN_NAME}`, '''''',.-'', '''')) = b.possible_standard_value GROUP BY a.`{COLUMN_NAME}`, possible_standard_value ORDER BY possible_standard_value ASC, count DESC LIMIT 500;' ),
1260
1260
('1290', '1018', 'Profile Anomaly' , 'Unlikely_Date_Values', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, ''{PROFILE_RUN_DATE}'' :: DATE AS profile_run_date, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} a WHERE (`{COLUMN_NAME}` < ''1900-01-01''::DATE) OR (`{COLUMN_NAME}` > ''{PROFILE_RUN_DATE}'' :: DATE + INTERVAL ''30 year'' ) GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` DESC LIMIT 500;' ),
('1293', '1021', 'Profile Anomaly' , 'Unexpected US States', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` DESC LIMIT 500;' ),
1264
1264
('1294', '1022', 'Profile Anomaly' , 'Unexpected Emails', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` DESC LIMIT 500;' ),
1265
-
('1295', '1023', 'Profile Anomaly' , 'Small_Numeric_Value_Ct', 'databricks', NULL, 'SELECT A.* FROM (SELECT DISTINCT ''Numeric'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;`{COLUMN_NAME}`%> = 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT TOP 10''Non-Numeric'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;`{COLUMN_NAME}`%> != 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC) AS B ORDER BY data_type, count DESC LIMIT 10;' ),
1265
+
('1295', '1023', 'Profile Anomaly' , 'Small_Numeric_Value_Ct', 'databricks', NULL, 'SELECT A.* FROM (SELECT DISTINCT ''Numeric'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;`{COLUMN_NAME}`%> = 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC LIMIT 10) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT ''Non-Numeric'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;`{COLUMN_NAME}`%> != 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC) AS B ORDER BY data_type, count DESC LIMIT 10;' ),
1266
1266
('1296', '1024', 'Profile Anomaly' , 'Invalid_Zip3_USA', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE(`{COLUMN_NAME}`,''012345678'',''999999999'') <> ''999'' GROUP BY `{COLUMN_NAME}` ORDER BY count DESC, `{COLUMN_NAME}` LIMIT 500;'),
1267
-
('1297', '1025', 'Profile Anomaly' , 'Delimited_Data_Embedded', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE REGEXP_LIKE(`{COLUMN_NAME}`::STRING, ''^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$'') AND NOT REGEXP_LIKE(`{COLUMN_NAME}`::STRING, ''.*\\s(and|but|or|yet)\\s.*'') GROUP BY `{COLUMN_NAME}` ORDER BY COUNT(*) DESC LIMIT 500;' ),
1267
+
('1297', '1025', 'Profile Anomaly' , 'Delimited_Data_Embedded', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE REGEXP_LIKE(`{COLUMN_NAME}`::STRING, ''^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$'') AND NOT REGEXP_LIKE(`{COLUMN_NAME}`::STRING, ''.*\\s(and|but|or|yet)\\s.*'') GROUP BY `{COLUMN_NAME}` ORDER BY count DESC LIMIT 500;' ),
1268
1268
1269
1269
('1298', '1004', 'Test Results', 'Alpha_Trunc', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}` , LEN(`{COLUMN_NAME}`) as current_max_length, {THRESHOLD_VALUE} as previous_max_length FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT MAX(LEN(`{COLUMN_NAME}`)) as max_length FROM {TARGET_SCHEMA}.{TABLE_NAME}) a WHERE LEN(`{COLUMN_NAME}`) = a.max_length AND a.max_length < {THRESHOLD_VALUE} LIMIT 500;'),
1270
1270
('1299', '1005', 'Test Results', 'Avg_Shift', 'databricks', NULL, 'SELECT AVG(`{COLUMN_NAME}` :: FLOAT) AS current_average FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
@@ -1278,7 +1278,7 @@ WHERE {SUBSET_CONDITION}
1278
1278
('1307', '1015', 'Test Results', 'Future_Date', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE GREATEST(0, SIGN(`{COLUMN_NAME}`::DATE - ''{TEST_DATE}''::DATE)) > {THRESHOLD_VALUE} GROUP BY `{COLUMN_NAME}` LIMIT 500;'),
1279
1279
('1308', '1016', 'Test Results', 'Future_Date_1Y', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE GREATEST(0, SIGN(`{COLUMN_NAME}`::DATE - (''{TEST_DATE}''::DATE + 365))) > {THRESHOLD_VALUE} GROUP BY `{COLUMN_NAME}` LIMIT 500;'),
1280
1280
('1309', '1017', 'Test Results', 'Incr_Avg_Shift', 'databricks', NULL, 'SELECT AVG(`{COLUMN_NAME}` :: FLOAT) AS current_average, SUM(`{COLUMN_NAME}` ::FLOAT) AS current_sum, NULLIF(COUNT(`{COLUMN_NAME}` )::FLOAT, 0) as current_value_count FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
1281
-
('1310', '1018', 'Test Results', 'LOV_All', 'databricks', NULL, 'SELECT LISTAGG(DISTINCT `{COLUMN_NAME}`, ''|'') WITHIN GROUP (ORDER BY `{COLUMN_NAME}`) FROM {TARGET_SCHEMA}.{TABLE_NAME} HAVING LISTAGG(DISTINCT `{COLUMN_NAME}`, ''|'') WITHIN GROUP (ORDER BY `{COLUMN_NAME}`) <> ''{THRESHOLD_VALUE}'' LIMIT 500;'),
1281
+
('1310', '1018', 'Test Results', 'LOV_All', 'databricks', NULL, 'SELECT ARRAY_JOIN(ARRAY_SORT(COLLECT_SET(`{COLUMN_NAME}`)), ''|'') AS aggregated_values FROM {TARGET_SCHEMA}.{TABLE_NAME} HAVING ARRAY_JOIN(ARRAY_SORT(COLLECT_SET(`{COLUMN_NAME}`)), ''|'') <> ''{THRESHOLD_VALUE}'' LIMIT 500;'),
1282
1282
('1311', '1019', 'Test Results', 'LOV_Match', 'databricks', NULL, 'SELECT DISTINCT NULLIF(`{COLUMN_NAME}`, '''') AS `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF(`{COLUMN_NAME}`, '''') NOT IN {BASELINE_VALUE} GROUP BY `{COLUMN_NAME}` LIMIT 500;'),
1283
1283
('1312', '1020', 'Test Results', 'Min_Date', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE `{COLUMN_NAME}` :: DATE < ''{BASELINE_VALUE}'' :: DATE GROUP BY `{COLUMN_NAME}` LIMIT 500;'),
1284
1284
('1313', '1021', 'Test Results', 'Min_Val', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, (ABS(`{COLUMN_NAME}`) - ABS({BASELINE_VALUE})) AS difference_from_baseline FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE `{COLUMN_NAME}` < {BASELINE_VALUE} LIMIT 500;'),
@@ -1299,7 +1299,7 @@ WHERE {SUBSET_CONDITION}
1299
1299
('1328', '1040', 'Test Results', 'Variability_Increase', 'databricks', NULL, 'SELECT STDDEV(CAST(`{COLUMN_NAME}` AS FLOAT)) as current_standard_deviation FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
1300
1300
('1329', '1041', 'Test Results', 'Variability_Decrease', 'databricks', NULL, 'SELECT STDDEV(CAST(`{COLUMN_NAME}` AS FLOAT)) as current_standard_deviation FROM {TARGET_SCHEMA}.{TABLE_NAME};'),
1301
1301
1302
-
('1230', '1027', 'Profile Anomaly' , 'Variant_Coded_Values', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE lower(`{COLUMN_NAME}`) IN (SELECT trim(value) FROM TABLE (FLATTEN(INPUT => SPLIT(SUBSTRING(''{DETAIL_EXPRESSION}'', POSITION('':'', ''{DETAIL_EXPRESSION}'') + 2), ''|''))) ) GROUP BY `{COLUMN_NAME}`;'),
1302
+
('1230', '1027', 'Profile Anomaly' , 'Variant_Coded_Values', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE LOWER(`{COLUMN_NAME}`) IN (SELECT TRIM(value) FROM (SELECT EXPLODE(SPLIT(SUBSTRING(''{DETAIL_EXPRESSION}'', INSTR(''{DETAIL_EXPRESSION}'', '':'') + 2), ''\\|'')) AS value)) GROUP BY `{COLUMN_NAME}`;'),
1303
1303
('1330', '1043', 'Test Results', 'Valid_Characters', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE REGEXP_LIKE(`{COLUMN_NAME}`, ''.*[[:cntrl:]].*'') OR `{COLUMN_NAME}`::STRING LIKE '' %'' OR `{COLUMN_NAME}`::STRING LIKE ''''''%'''''' OR `{COLUMN_NAME}`::STRING LIKE ''"%"'' GROUP BY `{COLUMN_NAME}` ORDER BY record_ct DESC LIMIT 20;'),
1304
1304
('1331', '1044', 'Test Results', 'Valid_US_Zip', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE(`{COLUMN_NAME}`,''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') GROUP BY `{COLUMN_NAME}` ORDER BY record_ct DESC LIMIT 20;'),
1305
1305
('1332', '1045', 'Test Results', 'Valid_US_Zip3', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE(`{COLUMN_NAME}`,''012345678'',''999999999'') NOT IN (''99999'', ''999999999'', ''99999-9999'') GROUP BY `{COLUMN_NAME}` ORDER BY record_ct DESC LIMIT 20;'),
0 commit comments