Skip to content

Commit 44d4a4f

Browse files
Chessing234claude
andcommitted
Fix DATETIME_DIFF divergence between PostgreSQL and BigQuery (#1549)
BigQuery's DATETIME_DIFF returns a truncated integer, while the PostgreSQL implementation returned a fractional value. This caused subtle differences in query results, particularly in urine_output_rate.sql where HOUR-based comparisons (<= 5, <= 11) included different time ranges depending on the backend. Changes: - Update postgres-functions.sql: wrap DATETIME_DIFF result with TRUNC to match BigQuery's truncation behavior - Update urine_output_rate.sql (BigQuery): use SECOND-based diffs divided by 3600.0 for fractional hour comparisons, matching the approach used in kdigo_uo.sql - Update urine_output_rate.sql (PostgreSQL): apply the same SECOND-based approach for consistency Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5706978 commit 44d4a4f

3 files changed

Lines changed: 29 additions & 21 deletions

File tree

mimic-iv/concepts/measurement/urine_output_rate.sql

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,10 @@ WITH tm AS (
1818
-- now calculate time since last UO measurement
1919
, uo_tm AS (
2020
SELECT tm.stay_id
21-
, CASE
22-
WHEN LAG(charttime) OVER w IS NULL
23-
THEN DATETIME_DIFF(charttime, intime_hr, MINUTE)
24-
ELSE DATETIME_DIFF(charttime, LAG(charttime) OVER w, MINUTE)
25-
END AS tm_since_last_uo
21+
, COALESCE(
22+
DATETIME_DIFF(charttime, LAG(charttime) OVER w, SECOND) / 60.0
23+
, DATETIME_DIFF(charttime, intime_hr, SECOND) / 60.0
24+
) AS tm_since_last_uo
2625
, uo.charttime
2726
, uo.urineoutput
2827
FROM tm
@@ -45,16 +44,19 @@ WITH tm AS (
4544
-- to 1 hour of UO, therefore we use '5' and '11' to restrict the
4645
-- period, rather than 6/12 this assumption may overestimate UO rate
4746
-- when documentation is done less than hourly
48-
, SUM(CASE WHEN DATETIME_DIFF(io.charttime, iosum.charttime, HOUR) <= 5
47+
-- Use SECOND-based diff divided by 3600 for fractional hours,
48+
-- ensuring consistent behavior between BigQuery and PostgreSQL
49+
-- (see issue #1549). We compare <= 5 and <= 11 hours respectively.
50+
, SUM(CASE WHEN DATETIME_DIFF(io.charttime, iosum.charttime, SECOND) / 3600.0 <= 5
4951
THEN iosum.urineoutput
5052
ELSE null END) AS urineoutput_6hr
51-
, SUM(CASE WHEN DATETIME_DIFF(io.charttime, iosum.charttime, HOUR) <= 5
53+
, SUM(CASE WHEN DATETIME_DIFF(io.charttime, iosum.charttime, SECOND) / 3600.0 <= 5
5254
THEN iosum.tm_since_last_uo
5355
ELSE null END) / 60.0 AS uo_tm_6hr
54-
, SUM(CASE WHEN DATETIME_DIFF(io.charttime, iosum.charttime, HOUR) <= 11
56+
, SUM(CASE WHEN DATETIME_DIFF(io.charttime, iosum.charttime, SECOND) / 3600.0 <= 11
5557
THEN iosum.urineoutput
5658
ELSE null END) AS urineoutput_12hr
57-
, SUM(CASE WHEN DATETIME_DIFF(io.charttime, iosum.charttime, HOUR) <= 11
59+
, SUM(CASE WHEN DATETIME_DIFF(io.charttime, iosum.charttime, SECOND) / 3600.0 <= 11
5860
THEN iosum.tm_since_last_uo
5961
ELSE null END) / 60.0 AS uo_tm_12hr
6062
-- 24 hours

mimic-iv/concepts_postgres/measurement/urine_output_rate.sql

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,10 @@ WITH tm AS (
1717
), uo_tm AS (
1818
SELECT
1919
tm.stay_id,
20-
CASE
21-
WHEN LAG(charttime) OVER w IS NULL
22-
THEN EXTRACT(EPOCH FROM charttime - intime_hr) / 60.0
23-
ELSE EXTRACT(EPOCH FROM charttime - LAG(charttime) OVER w) / 60.0
24-
END AS tm_since_last_uo,
20+
COALESCE(
21+
DATETIME_DIFF(charttime, LAG(charttime) OVER w, 'SECOND') / 60.0,
22+
DATETIME_DIFF(charttime, intime_hr, 'SECOND') / 60.0
23+
) AS tm_since_last_uo,
2524
uo.charttime,
2625
uo.urineoutput
2726
FROM tm
@@ -33,30 +32,33 @@ WITH tm AS (
3332
io.stay_id,
3433
io.charttime, /* we have joined each row to all rows preceding within 24 hours */ /* we can now sum these rows to get total UO over the last 24 hours */ /* we can use case statements to restrict it to only the last 6/12 hours */ /* therefore we have three sums: */ /* 1) over a 6 hour period */ /* 2) over a 12 hour period */ /* 3) over a 24 hour period */
3534
SUM(DISTINCT io.urineoutput) AS uo, /* note that we assume data charted at charttime corresponds */ /* to 1 hour of UO, therefore we use '5' and '11' to restrict the */ /* period, rather than 6/12 this assumption may overestimate UO rate */ /* when documentation is done less than hourly */
35+
/* Use SECOND-based diff divided by 3600 for fractional hours, */
36+
/* ensuring consistent behavior between BigQuery and PostgreSQL */
37+
/* (see issue #1549). We compare <= 5 and <= 11 hours respectively. */
3638
SUM(
3739
CASE
38-
WHEN EXTRACT(EPOCH FROM io.charttime - iosum.charttime) / 3600.0 <= 5
40+
WHEN DATETIME_DIFF(io.charttime, iosum.charttime, 'SECOND') / 3600.0 <= 5
3941
THEN iosum.urineoutput
4042
ELSE NULL
4143
END
4244
) AS urineoutput_6hr,
4345
CAST(SUM(
4446
CASE
45-
WHEN EXTRACT(EPOCH FROM io.charttime - iosum.charttime) / 3600.0 <= 5
47+
WHEN DATETIME_DIFF(io.charttime, iosum.charttime, 'SECOND') / 3600.0 <= 5
4648
THEN iosum.tm_since_last_uo
4749
ELSE NULL
4850
END
4951
) AS DOUBLE PRECISION) / 60.0 AS uo_tm_6hr,
5052
SUM(
5153
CASE
52-
WHEN EXTRACT(EPOCH FROM io.charttime - iosum.charttime) / 3600.0 <= 11
54+
WHEN DATETIME_DIFF(io.charttime, iosum.charttime, 'SECOND') / 3600.0 <= 11
5355
THEN iosum.urineoutput
5456
ELSE NULL
5557
END
5658
) AS urineoutput_12hr,
5759
CAST(SUM(
5860
CASE
59-
WHEN EXTRACT(EPOCH FROM io.charttime - iosum.charttime) / 3600.0 <= 11
61+
WHEN DATETIME_DIFF(io.charttime, iosum.charttime, 'SECOND') / 3600.0 <= 11
6062
THEN iosum.tm_since_last_uo
6163
ELSE NULL
6264
END

mimic-iv/concepts_postgres/postgres-functions.sql

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,17 +73,21 @@ LANGUAGE PLPGSQL;
7373

7474
-- below requires a regex to convert datepart from primitive to a string
7575
-- i.e. encapsulate it in single quotes
76+
-- BigQuery's DATETIME_DIFF returns a truncated integer, not a fractional value.
77+
-- For example, DATETIME_DIFF('10:30', '05:00', HOUR) returns 5 in BigQuery,
78+
-- not 5.5. We replicate this behavior using TRUNC to ensure parity between
79+
-- the PostgreSQL and BigQuery implementations (see issue #1549).
7680
CREATE OR REPLACE FUNCTION DATETIME_DIFF(endtime TIMESTAMP(3), starttime TIMESTAMP(3), datepart TEXT) RETURNS NUMERIC AS $$
7781
BEGIN
78-
RETURN
79-
EXTRACT(EPOCH FROM endtime - starttime) /
82+
RETURN
83+
TRUNC(EXTRACT(EPOCH FROM endtime - starttime) /
8084
CASE
8185
WHEN datepart = 'SECOND' THEN 1.0
8286
WHEN datepart = 'MINUTE' THEN 60.0
8387
WHEN datepart = 'HOUR' THEN 3600.0
8488
WHEN datepart = 'DAY' THEN 24*3600.0
8589
WHEN datepart = 'YEAR' THEN 365.242*24*3600.0
86-
ELSE NULL END;
90+
ELSE NULL END);
8791
END; $$
8892
LANGUAGE PLPGSQL;
8993

0 commit comments

Comments
 (0)