Skip to content

Commit 61bbf7c

Browse files
committed
fix(table freshness): bugs in calculating historical thresholds
1 parent c2c2577 commit 61bbf7c

2 files changed

Lines changed: 72 additions & 27 deletions

File tree

testgen/commands/run_test_execution.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ def run_test_execution(test_suite_id: str | UUID, username: str | None = None, r
8282

8383
sql_generator = TestExecutionSQL(connection, table_group, test_run)
8484

85+
# Update the thresholds before retrieving the test definitions in the next steps
86+
LOG.info("Updating historic test thresholds")
87+
execute_db_queries([sql_generator.update_historic_thresholds()])
88+
8589
LOG.info("Retrieving active test definitions in test suite")
8690
test_defs = fetch_dict_from_db(*sql_generator.get_active_test_definitions())
8791
test_defs = [TestExecutionDef(**item) for item in test_defs]
@@ -100,9 +104,6 @@ def run_test_execution(test_suite_id: str | UUID, username: str | None = None, r
100104
)
101105

102106
if valid_test_defs:
103-
LOG.info("Updating historic test thresholds")
104-
execute_db_queries([sql_generator.update_historic_thresholds()])
105-
106107
column_types = {(col.schema_name, col.table_name, col.column_name): col.column_type for col in data_chars}
107108
for td in valid_test_defs:
108109
td.column_type = column_types.get((td.schema_name, td.table_name, td.column_name))
Lines changed: 68 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,71 @@
1-
WITH stats AS (
2-
SELECT
3-
d.id AS test_definition_id,
4-
COALESCE(
5-
MIN(r.result_signal) FILTER (WHERE d.history_calculation = 'Value'),
6-
MIN(r.result_signal::NUMERIC) FILTER (WHERE d.history_calculation = 'Minimum')::VARCHAR,
7-
MAX(r.result_signal::NUMERIC) FILTER (WHERE d.history_calculation = 'Maximum')::VARCHAR,
8-
SUM(r.result_signal::NUMERIC) FILTER (WHERE d.history_calculation = 'Sum')::VARCHAR,
9-
AVG(r.result_signal::NUMERIC) FILTER (WHERE d.history_calculation = 'Average')::VARCHAR
10-
) as calc_signal
11-
FROM test_definitions d
12-
INNER JOIN LATERAL (
13-
SELECT result_signal
14-
FROM test_results tr
15-
WHERE tr.test_definition_id = d.id
16-
ORDER BY tr.test_time DESC
17-
LIMIT CASE WHEN d.history_calculation = 'Value' THEN 1 ELSE d.history_lookback END
18-
) AS r ON TRUE
19-
WHERE d.test_suite_id = :TEST_SUITE_ID
20-
AND d.test_active = 'Y'
21-
AND d.history_lookback IS NOT NULL
22-
GROUP BY d.id, d.history_calculation, d.history_lookback
1+
WITH filtered_defs AS (
2+
-- Step 1: Filter definitions first to minimize join surface area
3+
SELECT id,
4+
test_suite_id,
5+
schema_name,
6+
table_name,
7+
column_name,
8+
test_type,
9+
history_calculation,
10+
CASE WHEN history_calculation = 'Value' THEN 1 ELSE COALESCE(history_lookback, 1) END AS lookback
11+
FROM test_definitions
12+
WHERE test_suite_id = :TEST_SUITE_ID
13+
AND test_active = 'Y'
14+
AND history_calculation IS NOT NULL
15+
AND history_lookback IS NOT NULL
16+
),
17+
normalized_results AS (
18+
-- Step 2: Normalize definition IDs for autogenerated tests
19+
SELECT CASE
20+
WHEN r.auto_gen THEN d.id
21+
ELSE r.test_definition_id
22+
END AS test_definition_id,
23+
r.test_time,
24+
r.result_signal
25+
FROM test_results r
26+
LEFT JOIN filtered_defs d ON r.auto_gen = TRUE
27+
AND r.test_suite_id = d.test_suite_id
28+
AND r.schema_name = d.schema_name
29+
AND r.table_name IS NOT DISTINCT FROM d.table_name
30+
AND r.column_names IS NOT DISTINCT FROM d.column_name
31+
AND r.test_type = d.test_type
32+
WHERE r.test_suite_id = :TEST_SUITE_ID
33+
),
34+
ranked_results AS (
35+
-- Step 3: Use a Window Function to get the N most recent results
36+
SELECT n.test_definition_id,
37+
n.result_signal,
38+
CASE
39+
WHEN n.result_signal ~ '^-?[0-9]*\.?[0-9]+$' THEN n.result_signal::NUMERIC
40+
ELSE NULL
41+
END AS signal_numeric,
42+
ROW_NUMBER() OVER (PARTITION BY n.test_definition_id ORDER BY n.test_time DESC) AS rank
43+
FROM normalized_results n
44+
WHERE n.test_definition_id IN (SELECT id FROM filtered_defs)
45+
),
46+
stats AS (
47+
-- Step 4: Aggregate only the rows within the lookback range
48+
SELECT d.id AS test_definition_id,
49+
d.history_calculation,
50+
MAX(CASE WHEN rr.rank = 1 THEN rr.result_signal END) AS val,
51+
MIN(rr.signal_numeric) AS min,
52+
MAX(rr.signal_numeric) AS max,
53+
SUM(rr.signal_numeric) AS sum,
54+
AVG(rr.signal_numeric) AS avg
55+
FROM filtered_defs d
56+
JOIN ranked_results rr ON d.id = rr.test_definition_id
57+
WHERE rr.rank <= d.lookback
58+
GROUP BY d.id,
59+
d.history_calculation
2360
)
2461
UPDATE test_definitions t
25-
SET baseline_value = s.calc_signal
62+
SET baseline_value = CASE
63+
WHEN s.history_calculation = 'Value' THEN s.val
64+
WHEN s.history_calculation = 'Minimum' THEN s.min::VARCHAR
65+
WHEN s.history_calculation = 'Maximum' THEN s.max::VARCHAR
66+
WHEN s.history_calculation = 'Sum' THEN s.sum::VARCHAR
67+
WHEN s.history_calculation = 'Average' THEN s.avg::VARCHAR
68+
ELSE NULL
69+
END
2670
FROM stats s
27-
WHERE t.id = s.test_definition_id;
71+
WHERE t.id = s.test_definition_id;

0 commit comments

Comments
 (0)