Skip to content

Commit 5e9f402

Browse files
author
ci bot
committed
Merge branch 'fix/standardized-value-source-data-order-by' into 'enterprise'
fix: source data ORDER BY for Standardized_Value_Matches hygiene issue See merge request dkinternal/testgen/dataops-testgen!452
2 parents 4247394 + f07c561 commit 5e9f402

1 file changed

Lines changed: 10 additions & 10 deletions

File tree

testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Standardized_Value_Matches.yaml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,12 @@ profile_anomaly_types:
3131
GROUP BY possible_standard_value
3232
HAVING COUNT(DISTINCT `{COLUMN_NAME}`) > 1
3333
)
34-
SELECT DISTINCT a.`{COLUMN_NAME}`, COUNT(*) AS count
34+
SELECT a.`{COLUMN_NAME}`, COUNT(*) AS count
3535
FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` a
3636
JOIN cte b
3737
ON UPPER(REGEXP_REPLACE(CAST(a.`{COLUMN_NAME}` AS STRING), r"[ '\.\-\,]", '')) = b.possible_standard_value
3838
GROUP BY a.`{COLUMN_NAME}`
39-
ORDER BY possible_standard_value ASC, count DESC
39+
ORDER BY UPPER(REGEXP_REPLACE(CAST(a.`{COLUMN_NAME}` AS STRING), r"[ '\.\-\,]", '')) ASC, count DESC
4040
LIMIT {LIMIT};
4141
error_type: Profile Anomaly
4242
- id: '1289'
@@ -45,61 +45,61 @@ profile_anomaly_types:
4545
sql_flavor: databricks
4646
lookup_type: null
4747
lookup_query: |-
48-
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE(`{COLUMN_NAME}`, ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT `{COLUMN_NAME}`) FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` GROUP BY UPPER(TRANSLATE(`{COLUMN_NAME}`, ' '',.-', '')) HAVING COUNT(DISTINCT `{COLUMN_NAME}`) > 1 ) SELECT DISTINCT a.`{COLUMN_NAME}`, COUNT(*) AS count FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` a, cte b WHERE UPPER(TRANSLATE(a.`{COLUMN_NAME}`, ' '',.-', '')) = b.possible_standard_value GROUP BY a.`{COLUMN_NAME}` ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
48+
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE(`{COLUMN_NAME}`, ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT `{COLUMN_NAME}`) FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` GROUP BY UPPER(TRANSLATE(`{COLUMN_NAME}`, ' '',.-', '')) HAVING COUNT(DISTINCT `{COLUMN_NAME}`) > 1 ) SELECT a.`{COLUMN_NAME}`, COUNT(*) AS count FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` a, cte b WHERE UPPER(TRANSLATE(a.`{COLUMN_NAME}`, ' '',.-', '')) = b.possible_standard_value GROUP BY a.`{COLUMN_NAME}` ORDER BY UPPER(TRANSLATE(a.`{COLUMN_NAME}`, ' '',.-', '')) ASC, count DESC LIMIT {LIMIT};
4949
error_type: Profile Anomaly
5050
- id: '1131'
5151
test_id: '1017'
5252
test_type: Standardized_Value_Matches
5353
sql_flavor: mssql
5454
lookup_type: null
5555
lookup_query: |-
56-
WITH CTE AS ( SELECT DISTINCT TOP {LIMIT} UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") as distinct_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC;
56+
WITH CTE AS ( SELECT DISTINCT TOP {LIMIT} UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") as distinct_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY UPPER(REPLACE(TRANSLATE(a."{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) ASC, count DESC;
5757
error_type: Profile Anomaly
5858
- id: '1074'
5959
test_id: '1017'
6060
test_type: Standardized_Value_Matches
6161
sql_flavor: postgresql
6262
lookup_type: null
6363
lookup_query: |-
64-
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
64+
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) ASC, count DESC LIMIT {LIMIT};
6565
error_type: Profile Anomaly
6666
- id: '1049'
6767
test_id: '1017'
6868
test_type: Standardized_Value_Matches
6969
sql_flavor: redshift
7070
lookup_type: null
7171
lookup_query: |-
72-
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
72+
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) ASC, count DESC LIMIT {LIMIT};
7373
error_type: Profile Anomaly
7474
- id: '1449'
7575
test_id: '1017'
7676
test_type: Standardized_Value_Matches
7777
sql_flavor: redshift_spectrum
7878
lookup_type: null
7979
lookup_query: |-
80-
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
80+
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) ASC, count DESC LIMIT {LIMIT};
8181
error_type: Profile Anomaly
8282
- id: '1188'
8383
test_id: '1017'
8484
test_type: Standardized_Value_Matches
8585
sql_flavor: snowflake
8686
lookup_type: null
8787
lookup_query: |-
88-
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
88+
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) ASC, count DESC LIMIT {LIMIT};
8989
error_type: Profile Anomaly
9090
- id: '1516'
9191
test_id: '1017'
9292
test_type: Standardized_Value_Matches
9393
sql_flavor: oracle
9494
lookup_type: null
9595
lookup_query: |-
96-
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", 'X '',.-', 'X')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") AS cnt FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", 'X '',.-', 'X')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", 'X '',.-', 'X')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC FETCH FIRST {LIMIT} ROWS ONLY
96+
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", 'X '',.-', 'X')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") AS cnt FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", 'X '',.-', 'X')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", 'X '',.-', 'X')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY UPPER(TRANSLATE(a."{COLUMN_NAME}", 'X '',.-', 'X')) ASC, count DESC FETCH FIRST {LIMIT} ROWS ONLY
9797
error_type: Profile Anomaly
9898
- id: '1516'
9999
test_id: '1017'
100100
test_type: Standardized_Value_Matches
101101
sql_flavor: sap_hana
102102
lookup_type: null
103103
lookup_query: |-
104-
WITH CTE AS ( SELECT DISTINCT UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE("{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") AS cnt FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE("{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(a."{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT}
104+
WITH CTE AS ( SELECT DISTINCT UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE("{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") AS cnt FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE("{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(a."{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(a."{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) ASC, count DESC LIMIT {LIMIT}
105105
error_type: Profile Anomaly

0 commit comments

Comments
 (0)