Skip to content

Commit 18c427d

Browse files
committed
fix: use toJSONString for metric attribute hashing with JSON schema (#2087)
1 parent c4a1311 commit 18c427d

4 files changed

Lines changed: 467 additions & 7 deletions

File tree

packages/common-utils/src/__tests__/__snapshots__/renderChartConfig.test.ts.snap

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,200 @@ exports[`renderChartConfig HAVING clause should render HAVING clause with multip
2222
),count(),endpoint FROM default.metrics WHERE (timestamp >= fromUnixTimestamp64Milli(1739318400000) AND timestamp <= fromUnixTimestamp64Milli(1739491200000)) GROUP BY endpoint HAVING avg(response_time) > 500 AND count(*) > 10 SETTINGS optimize_read_in_order = 0, cast_keep_nullable = 1, additional_result_filter = 'x != 2', count_distinct_implementation = 'uniqCombined64', async_insert_busy_timeout_min_ms = 20000"
2323
`;
2424

25+
exports[`renderChartConfig JSON schema (BETA_CH_OTEL_JSON_SCHEMA_ENABLED) should use toJSONString-based hash for gauge metric when Attributes column is JSON type 1`] = `
26+
"WITH Source AS (
27+
SELECT
28+
*,
29+
cityHash64(toJSONString(ScopeAttributes), toJSONString(ResourceAttributes), toJSONString(Attributes)) AS AttributesHash
30+
FROM default.otel_metrics_gauge
31+
WHERE (TimeUnix >= fromUnixTimestamp64Milli(1739318400000) AND TimeUnix <= fromUnixTimestamp64Milli(1765670400000)) AND ((MetricName = 'system.cpu.utilization'))
32+
),Bucketed AS (
33+
SELECT
34+
toStartOfInterval(toDateTime(TimeUnix), INTERVAL 1 minute) AS \`__hdx_time_bucket2\`,
35+
AttributesHash,
36+
last_value(Value) AS LastValue,
37+
any(ScopeAttributes) AS ScopeAttributes,
38+
any(ResourceAttributes) AS ResourceAttributes,
39+
any(Attributes) AS Attributes,
40+
any(ResourceSchemaUrl) AS ResourceSchemaUrl,
41+
any(ScopeName) AS ScopeName,
42+
any(ScopeVersion) AS ScopeVersion,
43+
any(ScopeDroppedAttrCount) AS ScopeDroppedAttrCount,
44+
any(ScopeSchemaUrl) AS ScopeSchemaUrl,
45+
any(ServiceName) AS ServiceName,
46+
any(MetricDescription) AS MetricDescription,
47+
any(MetricUnit) AS MetricUnit,
48+
any(StartTimeUnix) AS StartTimeUnix,
49+
any(Flags) AS Flags
50+
FROM Source
51+
GROUP BY AttributesHash, __hdx_time_bucket2
52+
ORDER BY AttributesHash, __hdx_time_bucket2
53+
) SELECT avg(
54+
toFloat64OrDefault(toString(LastValue))
55+
),toStartOfInterval(toDateTime(__hdx_time_bucket2), INTERVAL 1 minute) AS \`__hdx_time_bucket\` FROM Bucketed WHERE (__hdx_time_bucket2 >= fromUnixTimestamp64Milli(1739318400000) AND __hdx_time_bucket2 <= fromUnixTimestamp64Milli(1765670400000)) GROUP BY toStartOfInterval(toDateTime(__hdx_time_bucket2), INTERVAL 1 minute) AS \`__hdx_time_bucket\` ORDER BY toStartOfInterval(toDateTime(__hdx_time_bucket2), INTERVAL 1 minute) AS \`__hdx_time_bucket\` LIMIT 10 SETTINGS short_circuit_function_evaluation = 'force_enable', optimize_read_in_order = 0, cast_keep_nullable = 1, additional_result_filter = 'x != 2', count_distinct_implementation = 'uniqCombined64', async_insert_busy_timeout_min_ms = 20000"
56+
`;
57+
58+
exports[`renderChartConfig JSON schema (BETA_CH_OTEL_JSON_SCHEMA_ENABLED) should use toJSONString-based hash for histogram (count) metric when Attributes column is JSON type 1`] = `
59+
"WITH source AS (
60+
SELECT
61+
TimeUnix,
62+
AggregationTemporality,
63+
toStartOfInterval(toDateTime(TimeUnix), INTERVAL 2 minute) AS \`__hdx_time_bucket\`,
64+
65+
cityHash64(toJSONString(ScopeAttributes), toJSONString(ResourceAttributes), toJSONString(Attributes)) AS attr_hash,
66+
cityHash64(ExplicitBounds) AS bounds_hash,
67+
toInt64(Count) AS current_count,
68+
lagInFrame(toNullable(current_count), 1, NULL) OVER (
69+
PARTITION BY attr_hash, bounds_hash, AggregationTemporality
70+
ORDER BY TimeUnix
71+
) AS prev_count,
72+
CASE
73+
WHEN AggregationTemporality = 1 THEN current_count
74+
WHEN AggregationTemporality = 2 THEN greatest(0, current_count - coalesce(prev_count, 0))
75+
ELSE 0
76+
END AS delta
77+
FROM default.otel_metrics_histogram
78+
WHERE (TimeUnix >= toStartOfInterval(fromUnixTimestamp64Milli(1739318400000), INTERVAL 2 minute) - INTERVAL 2 minute AND TimeUnix <= toStartOfInterval(fromUnixTimestamp64Milli(1765670400000), INTERVAL 2 minute) + INTERVAL 2 minute) AND ((MetricName = 'http.server.request.count'))
79+
),metrics AS (
80+
SELECT
81+
\`__hdx_time_bucket\`,
82+
83+
sum(delta) AS \\"Value\\"
84+
FROM source
85+
GROUP BY \`__hdx_time_bucket\`
86+
) SELECT \`__hdx_time_bucket\`, \\"Value\\" FROM metrics WHERE (\`__hdx_time_bucket\` >= fromUnixTimestamp64Milli(1739318400000) AND \`__hdx_time_bucket\` <= fromUnixTimestamp64Milli(1765670400000)) LIMIT 10 SETTINGS short_circuit_function_evaluation = 'force_enable', optimize_read_in_order = 0, cast_keep_nullable = 1, additional_result_filter = 'x != 2', count_distinct_implementation = 'uniqCombined64', async_insert_busy_timeout_min_ms = 20000"
87+
`;
88+
89+
exports[`renderChartConfig JSON schema (BETA_CH_OTEL_JSON_SCHEMA_ENABLED) should use toJSONString-based hash for histogram (quantile) metric when Attributes column is JSON type 1`] = `
90+
"WITH source AS (
91+
SELECT
92+
MetricName,
93+
ExplicitBounds,
94+
toStartOfInterval(toDateTime(TimeUnix), INTERVAL 2 minute) AS \`__hdx_time_bucket\`,
95+
96+
sumForEach(deltas) as rates
97+
FROM (
98+
SELECT
99+
TimeUnix,
100+
MetricName,
101+
ResourceAttributes,
102+
Attributes,
103+
ExplicitBounds,
104+
attr_hash,
105+
any(attr_hash) OVER (ROWS BETWEEN 1 preceding AND 1 preceding) AS prev_attr_hash,
106+
any(bounds_hash) OVER (ROWS BETWEEN 1 preceding AND 1 preceding) AS prev_bounds_hash,
107+
any(counts) OVER (ROWS BETWEEN 1 preceding AND 1 preceding) AS prev_counts,
108+
counts,
109+
IF(
110+
AggregationTemporality = 1
111+
OR prev_attr_hash != attr_hash
112+
OR bounds_hash != prev_bounds_hash
113+
OR arrayExists((x) -> x.2 < x.1, arrayZip(prev_counts, counts)),
114+
counts,
115+
counts - prev_counts
116+
) AS deltas
117+
FROM (
118+
SELECT
119+
TimeUnix,
120+
MetricName,
121+
AggregationTemporality,
122+
ExplicitBounds,
123+
ResourceAttributes,
124+
Attributes,
125+
cityHash64(toJSONString(ScopeAttributes), toJSONString(ResourceAttributes), toJSONString(Attributes)) AS attr_hash,
126+
cityHash64(ExplicitBounds) AS bounds_hash,
127+
CAST(BucketCounts AS Array(Int64)) counts
128+
FROM default.otel_metrics_histogram
129+
WHERE (TimeUnix >= toStartOfInterval(fromUnixTimestamp64Milli(1739318400000), INTERVAL 2 minute) - INTERVAL 2 minute AND TimeUnix <= toStartOfInterval(fromUnixTimestamp64Milli(1765670400000), INTERVAL 2 minute) + INTERVAL 2 minute) AND ((MetricName = 'http.server.duration'))
130+
ORDER BY attr_hash, TimeUnix ASC
131+
)
132+
)
133+
GROUP BY \`__hdx_time_bucket\`, MetricName, ExplicitBounds
134+
ORDER BY \`__hdx_time_bucket\`
135+
),points AS (
136+
SELECT
137+
\`__hdx_time_bucket\`,
138+
MetricName,
139+
140+
arrayZipUnaligned(arrayCumSum(rates), ExplicitBounds) as point,
141+
length(point) as n
142+
FROM source
143+
),metrics AS (
144+
SELECT
145+
\`__hdx_time_bucket\`,
146+
MetricName,
147+
148+
point[n].1 AS total,
149+
0.95 * total AS rank,
150+
arrayFirstIndex(x -> if(x.1 > rank, 1, 0), point) AS upper_idx,
151+
point[upper_idx].1 AS upper_count,
152+
ifNull(point[upper_idx].2, inf) AS upper_bound,
153+
CASE
154+
WHEN upper_idx > 1 THEN point[upper_idx - 1].2
155+
WHEN point[upper_idx].2 > 0 THEN 0
156+
ELSE inf
157+
END AS lower_bound,
158+
if (
159+
lower_bound = 0,
160+
0,
161+
point[upper_idx - 1].1
162+
) AS lower_count,
163+
CASE
164+
WHEN upper_bound = inf THEN point[upper_idx - 1].2
165+
WHEN lower_bound = inf THEN point[1].2
166+
ELSE lower_bound + (upper_bound - lower_bound) * ((rank - lower_count) / (upper_count - lower_count))
167+
END AS \\"Value\\"
168+
FROM points
169+
WHERE length(point) > 1 AND total > 0
170+
) SELECT \`__hdx_time_bucket\`, \\"Value\\" FROM metrics WHERE (\`__hdx_time_bucket\` >= fromUnixTimestamp64Milli(1739318400000) AND \`__hdx_time_bucket\` <= fromUnixTimestamp64Milli(1765670400000)) LIMIT 10 SETTINGS short_circuit_function_evaluation = 'force_enable', optimize_read_in_order = 0, cast_keep_nullable = 1, additional_result_filter = 'x != 2', count_distinct_implementation = 'uniqCombined64', async_insert_busy_timeout_min_ms = 20000"
171+
`;
172+
173+
exports[`renderChartConfig JSON schema (BETA_CH_OTEL_JSON_SCHEMA_ENABLED) should use toJSONString-based hash for sum metric when Attributes column is JSON type 1`] = `
174+
"WITH Source AS (
175+
SELECT
176+
*,
177+
cityHash64(toJSONString(ScopeAttributes), toJSONString(ResourceAttributes), toJSONString(Attributes)) AS AttributesHash,
178+
IF(AggregationTemporality = 1,
179+
SUM(Value) OVER (PARTITION BY AttributesHash ORDER BY AttributesHash, TimeUnix ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),
180+
IF(IsMonotonic = 0,
181+
Value,
182+
deltaSum(Value) OVER (PARTITION BY AttributesHash ORDER BY AttributesHash, TimeUnix ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
183+
)
184+
) AS Rate,
185+
IF(AggregationTemporality = 1, Rate, Value) AS Sum
186+
FROM default.otel_metrics_sum
187+
WHERE (TimeUnix >= toStartOfInterval(fromUnixTimestamp64Milli(1739318400000), INTERVAL 5 minute) - INTERVAL 5 minute AND TimeUnix <= toStartOfInterval(fromUnixTimestamp64Milli(1765670400000), INTERVAL 5 minute) + INTERVAL 5 minute) AND ((MetricName = 'db.client.connections.usage'))),Bucketed AS (
188+
SELECT
189+
toStartOfInterval(toDateTime(TimeUnix), INTERVAL 5 minute) AS \`__hdx_time_bucket2\`,
190+
AttributesHash,
191+
last_value(Source.Rate) AS \`__hdx_value_high\`,
192+
any(\`__hdx_value_high\`) OVER(PARTITION BY AttributesHash ORDER BY \`__hdx_time_bucket2\` ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS \`__hdx_value_high_prev\`,
193+
IF(IsMonotonic = 1, \`__hdx_value_high\` - \`__hdx_value_high_prev\`, \`__hdx_value_high\`) AS Rate,
194+
last_value(Source.Sum) AS Sum,
195+
any(ResourceAttributes) AS ResourceAttributes,
196+
any(ResourceSchemaUrl) AS ResourceSchemaUrl,
197+
any(ScopeName) AS ScopeName,
198+
any(ScopeVersion) AS ScopeVersion,
199+
any(ScopeAttributes) AS ScopeAttributes,
200+
any(ScopeDroppedAttrCount) AS ScopeDroppedAttrCount,
201+
any(ScopeSchemaUrl) AS ScopeSchemaUrl,
202+
any(ServiceName) AS ServiceName,
203+
any(MetricName) AS MetricName,
204+
any(MetricDescription) AS MetricDescription,
205+
any(MetricUnit) AS MetricUnit,
206+
any(Attributes) AS Attributes,
207+
any(StartTimeUnix) AS StartTimeUnix,
208+
any(Flags) AS Flags,
209+
any(AggregationTemporality) AS AggregationTemporality,
210+
any(IsMonotonic) AS IsMonotonic
211+
FROM Source
212+
GROUP BY AttributesHash, \`__hdx_time_bucket2\`
213+
ORDER BY AttributesHash, \`__hdx_time_bucket2\`
214+
) SELECT avg(
215+
toFloat64OrDefault(toString(Rate))
216+
) AS \\"Value\\",toStartOfInterval(toDateTime(\`__hdx_time_bucket2\`), INTERVAL 5 minute) AS \`__hdx_time_bucket\` FROM Bucketed WHERE (\`__hdx_time_bucket2\` >= fromUnixTimestamp64Milli(1739318400000) AND \`__hdx_time_bucket2\` <= fromUnixTimestamp64Milli(1765670400000)) GROUP BY toStartOfInterval(toDateTime(\`__hdx_time_bucket2\`), INTERVAL 5 minute) AS \`__hdx_time_bucket\` ORDER BY toStartOfInterval(toDateTime(\`__hdx_time_bucket2\`), INTERVAL 5 minute) AS \`__hdx_time_bucket\` LIMIT 10 SETTINGS optimize_read_in_order = 0, cast_keep_nullable = 1, additional_result_filter = 'x != 2', count_distinct_implementation = 'uniqCombined64', async_insert_busy_timeout_min_ms = 20000"
217+
`;
218+
25219
exports[`renderChartConfig SETTINGS clause should apply the "chart config" settings to the query 1`] = `"SELECT histogramMerge(20)(Duration),severity FROM default.logs WHERE (timestamp >= fromUnixTimestamp64Milli(1739318400000) AND timestamp <= fromUnixTimestamp64Milli(1739491200000)) GROUP BY severity SETTINGS short_circuit_function_evaluation = 'force_enable', optimize_read_in_order = 0, cast_keep_nullable = 1, additional_result_filter = 'x != 2', count_distinct_implementation = 'uniqCombined64', async_insert_busy_timeout_min_ms = 20000"`;
26220

27221
exports[`renderChartConfig SETTINGS clause should apply the "query settings" settings to the query 1`] = `"SELECT histogramMerge(20)(Duration),severity FROM default.logs WHERE (timestamp >= fromUnixTimestamp64Milli(1739318400000) AND timestamp <= fromUnixTimestamp64Milli(1739491200000)) GROUP BY severity SETTINGS optimize_read_in_order = 0, cast_keep_nullable = 1, additional_result_filter = 'x != 2', count_distinct_implementation = 'uniqCombined64', async_insert_busy_timeout_min_ms = 20000"`;

0 commit comments

Comments
 (0)