From 436f68a795ca68f67dcdef5a3bbf7e371af9e7f4 Mon Sep 17 00:00:00 2001 From: Vjeran Grozdanic Date: Thu, 2 Jul 2026 23:16:11 +0200 Subject: [PATCH] feat(ai): Rename deprecated span attrs to OTEL names and add granular cost fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename three SpanData usage fields to their new OTEL semantic convention names, keeping the old names as legacy_alias for backward compatibility: - gen_ai.usage.input_tokens.cached → gen_ai.usage.cache_read.input_tokens - gen_ai.usage.input_tokens.cache_write → gen_ai.usage.cache_creation.input_tokens - gen_ai.usage.output_tokens.reasoning → gen_ai.usage.reasoning.output_tokens Add three new granular cost fields mirroring the usage token breakdown: - gen_ai.cost.cache_read.input_tokens - gen_ai.cost.cache_creation.input_tokens - gen_ai.cost.reasoning.output_tokens Co-Authored-By: Claude Opus 4.6 (1M context) --- relay-conventions/sentry-conventions | 2 +- relay-event-normalization/src/eap/ai.rs | 73 +++++++++++++++++++ relay-event-normalization/src/event.rs | 29 +++++++- .../src/normalize/span/ai.rs | 67 +++++++++++++---- relay-event-schema/src/protocol/span.rs | 46 +++++++++--- .../src/protocol/span/convert.rs | 9 ++- 6 files changed, 191 insertions(+), 35 deletions(-) diff --git a/relay-conventions/sentry-conventions b/relay-conventions/sentry-conventions index daee5ba1c58..d97c519f04f 160000 --- a/relay-conventions/sentry-conventions +++ b/relay-conventions/sentry-conventions @@ -1 +1 @@ -Subproject commit daee5ba1c580ff841db03b0decfb1b0250ad1edf +Subproject commit d97c519f04f2402bd1f68e21a31ffa43f9e3fec3 diff --git a/relay-event-normalization/src/eap/ai.rs b/relay-event-normalization/src/eap/ai.rs index 2d875a2f05c..b2e5a1125d5 100644 --- a/relay-event-normalization/src/eap/ai.rs +++ b/relay-event-normalization/src/eap/ai.rs @@ -208,6 +208,19 @@ fn normalize_ai_costs(attributes: &mut Attributes, model_metadata: Option<&Model attributes.insert(GEN_AI__COST__INPUT_TOKENS, costs.input); attributes.insert(GEN_AI__COST__OUTPUT_TOKENS, costs.output); attributes.insert(GEN_AI__COST__TOTAL_TOKENS, costs.total()); + + attributes.insert( + GEN_AI__COST__CACHE_READ__INPUT_TOKENS, + costs.cache_read_input, + ); + attributes.insert( + GEN_AI__COST__CACHE_CREATION__INPUT_TOKENS, + costs.cache_creation_input, + ); + attributes.insert( + GEN_AI__COST__REASONING__OUTPUT_TOKENS, + costs.reasoning_output, + ); } fn extract_string_value<'a>(attributes: &'a Attributes, key: &str) -> Option<&'a str> { @@ -305,6 +318,14 @@ mod tests { assert_annotated_snapshot!(attributes, @r#" { + "gen_ai.cost.cache_creation.input_tokens": { + "type": "double", + "value": 0.0 + }, + "gen_ai.cost.cache_read.input_tokens": { + "type": "double", + "value": 20.0 + }, "gen_ai.cost.input_tokens": { "type": "double", "value": 25.0 @@ -313,6 +334,10 @@ mod tests { "type": "double", "value": 50.0 }, + "gen_ai.cost.reasoning.output_tokens": { + "type": "double", + "value": 30.0 + }, "gen_ai.cost.total_tokens": { "type": "double", "value": 75.0 @@ -374,6 +399,14 @@ mod tests { assert_annotated_snapshot!(attributes, @r#" { + "gen_ai.cost.cache_creation.input_tokens": { + "type": "double", + "value": 0.0 + }, + "gen_ai.cost.cache_read.input_tokens": { + "type": "double", + "value": 0.0 + }, "gen_ai.cost.input_tokens": { "type": "double", "value": 90.0 @@ -382,6 +415,10 @@ mod tests { "type": "double", "value": 100.0 }, + "gen_ai.cost.reasoning.output_tokens": { + "type": "double", + "value": 0.0 + }, "gen_ai.cost.total_tokens": { "type": "double", "value": 190.0 @@ -483,6 +520,14 @@ mod tests { assert_annotated_snapshot!(attributes, @r#" { + "gen_ai.cost.cache_creation.input_tokens": { + "type": "double", + "value": 0.0 + }, + "gen_ai.cost.cache_read.input_tokens": { + "type": "double", + "value": 0.0 + }, "gen_ai.cost.input_tokens": { "type": "double", "value": 90.0 @@ -491,6 +536,10 @@ mod tests { "type": "double", "value": 100.0 }, + "gen_ai.cost.reasoning.output_tokens": { + "type": "double", + "value": 0.0 + }, "gen_ai.cost.total_tokens": { "type": "double", "value": 190.0 @@ -552,6 +601,14 @@ mod tests { assert_annotated_snapshot!(attributes, @r#" { + "gen_ai.cost.cache_creation.input_tokens": { + "type": "double", + "value": 0.0 + }, + "gen_ai.cost.cache_read.input_tokens": { + "type": "double", + "value": 0.0 + }, "gen_ai.cost.input_tokens": { "type": "double", "value": 90.0 @@ -560,6 +617,10 @@ mod tests { "type": "double", "value": 100.0 }, + "gen_ai.cost.reasoning.output_tokens": { + "type": "double", + "value": 0.0 + }, "gen_ai.cost.total_tokens": { "type": "double", "value": 190.0 @@ -683,6 +744,14 @@ mod tests { "type": "integer", "value": 100000 }, + "gen_ai.cost.cache_creation.input_tokens": { + "type": "double", + "value": 0.0 + }, + "gen_ai.cost.cache_read.input_tokens": { + "type": "double", + "value": 0.0 + }, "gen_ai.cost.input_tokens": { "type": "double", "value": 300.0 @@ -691,6 +760,10 @@ mod tests { "type": "double", "value": 240.0 }, + "gen_ai.cost.reasoning.output_tokens": { + "type": "double", + "value": 0.0 + }, "gen_ai.cost.total_tokens": { "type": "double", "value": 540.0 diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs index 16674c9c88f..43d2951ef65 100644 --- a/relay-event-normalization/src/event.rs +++ b/relay-event-normalization/src/event.rs @@ -2568,6 +2568,9 @@ mod tests { "gen_ai.cost.total_tokens": 50.0, "gen_ai.cost.input_tokens": 10.0, "gen_ai.cost.output_tokens": 40.0, + "gen_ai.cost.cache_read.input_tokens": 0.0, + "gen_ai.cost.cache_creation.input_tokens": 0.0, + "gen_ai.cost.reasoning.output_tokens": 0.0, "gen_ai.response.tokens_per_second": 62500.0, "gen_ai.operation.type": "ai_client" } @@ -2582,6 +2585,9 @@ mod tests { "gen_ai.cost.total_tokens": 80.0, "gen_ai.cost.input_tokens": 20.0, "gen_ai.cost.output_tokens": 60.0, + "gen_ai.cost.cache_read.input_tokens": 0.0, + "gen_ai.cost.cache_creation.input_tokens": 0.0, + "gen_ai.cost.reasoning.output_tokens": 0.0, "gen_ai.response.tokens_per_second": 62500.0, "gen_ai.operation.type": "ai_client" } @@ -2687,14 +2693,17 @@ mod tests { { "gen_ai.usage.total_tokens": 3000.0, "gen_ai.usage.input_tokens": 1000, - "gen_ai.usage.input_tokens.cached": 500, + "gen_ai.usage.cache_read.input_tokens": 500, "gen_ai.usage.output_tokens": 2000, - "gen_ai.usage.output_tokens.reasoning": 1000, + "gen_ai.usage.reasoning.output_tokens": 1000, "gen_ai.response.model": "claude-2.1", "gen_ai.request.model": "claude-2.1", "gen_ai.cost.total_tokens": 75.0, "gen_ai.cost.input_tokens": 25.0, "gen_ai.cost.output_tokens": 50.0, + "gen_ai.cost.cache_read.input_tokens": 20.0, + "gen_ai.cost.cache_creation.input_tokens": 0.0, + "gen_ai.cost.reasoning.output_tokens": 30.0, "gen_ai.response.tokens_per_second": 2000.0, "gen_ai.operation.type": "ai_client" } @@ -2709,6 +2718,9 @@ mod tests { "gen_ai.cost.total_tokens": 190.0, "gen_ai.cost.input_tokens": 90.0, "gen_ai.cost.output_tokens": 100.0, + "gen_ai.cost.cache_read.input_tokens": 0.0, + "gen_ai.cost.cache_creation.input_tokens": 0.0, + "gen_ai.cost.reasoning.output_tokens": 0.0, "gen_ai.response.tokens_per_second": 2000.0, "gen_ai.operation.type": "ai_client" } @@ -2722,6 +2734,9 @@ mod tests { "gen_ai.cost.total_tokens": 190.0, "gen_ai.cost.input_tokens": 90.0, "gen_ai.cost.output_tokens": 100.0, + "gen_ai.cost.cache_read.input_tokens": 0.0, + "gen_ai.cost.cache_creation.input_tokens": 0.0, + "gen_ai.cost.reasoning.output_tokens": 0.0, "gen_ai.response.tokens_per_second": 2000.0, "gen_ai.operation.type": "ai_client" } @@ -2870,14 +2885,17 @@ mod tests { { "gen_ai.usage.total_tokens": 3000.0, "gen_ai.usage.input_tokens": 1000, - "gen_ai.usage.input_tokens.cached": 500, + "gen_ai.usage.cache_read.input_tokens": 500, "gen_ai.usage.output_tokens": 2000, - "gen_ai.usage.output_tokens.reasoning": 1000, + "gen_ai.usage.reasoning.output_tokens": 1000, "gen_ai.response.model": "claude-2.1", "gen_ai.request.model": "claude-2.1", "gen_ai.cost.total_tokens": 65.0, "gen_ai.cost.input_tokens": 25.0, "gen_ai.cost.output_tokens": 40.0, + "gen_ai.cost.cache_read.input_tokens": 20.0, + "gen_ai.cost.cache_creation.input_tokens": 0.0, + "gen_ai.cost.reasoning.output_tokens": 20.0, "gen_ai.response.tokens_per_second": 62500.0, "gen_ai.operation.type": "ai_client" } @@ -2892,6 +2910,9 @@ mod tests { "gen_ai.cost.total_tokens": 190.0, "gen_ai.cost.input_tokens": 90.0, "gen_ai.cost.output_tokens": 100.0, + "gen_ai.cost.cache_read.input_tokens": 0.0, + "gen_ai.cost.cache_creation.input_tokens": 0.0, + "gen_ai.cost.reasoning.output_tokens": 0.0, "gen_ai.response.tokens_per_second": 62500.0, "gen_ai.operation.type": "ai_client" } diff --git a/relay-event-normalization/src/normalize/span/ai.rs b/relay-event-normalization/src/normalize/span/ai.rs index 3a38b3c993e..58ec08d9b89 100644 --- a/relay-event-normalization/src/normalize/span/ai.rs +++ b/relay-event-normalization/src/normalize/span/ai.rs @@ -40,9 +40,9 @@ impl UsedTokens { Self { input_tokens: get_value!(data.gen_ai_usage_input_tokens), output_tokens: get_value!(data.gen_ai_usage_output_tokens), - output_reasoning_tokens: get_value!(data.gen_ai_usage_output_tokens_reasoning), - input_cached_tokens: get_value!(data.gen_ai_usage_input_tokens_cached), - input_cache_write_tokens: get_value!(data.gen_ai_usage_input_tokens_cache_write), + output_reasoning_tokens: get_value!(data.gen_ai_usage_reasoning_output_tokens), + input_cached_tokens: get_value!(data.gen_ai_usage_cache_read_input_tokens), + input_cache_write_tokens: get_value!(data.gen_ai_usage_cache_creation_input_tokens), } } @@ -72,10 +72,16 @@ impl UsedTokens { /// Calculated model call costs. #[derive(Debug, Copy, Clone)] pub struct CalculatedCost { - /// The cost of input tokens used. + /// The total cost of all input tokens (raw + cached + cache_write). pub input: f64, - /// The cost of output tokens used. + /// The total cost of all output tokens (raw + reasoning). pub output: f64, + /// The cost of cached input tokens only (subset of `input`). + pub cache_read_input: f64, + /// The cost of cache-write input tokens only (subset of `input`). + pub cache_creation_input: f64, + /// The cost of reasoning output tokens only (subset of `output`). + pub reasoning_output: f64, } impl CalculatedCost { @@ -104,19 +110,21 @@ pub fn calculate_costs( return None; } + let cache_read_input = tokens.input_cached_tokens * model_cost.input_cached_per_token; + let cache_creation_input = + tokens.input_cache_write_tokens * model_cost.input_cache_write_per_token; let input = (tokens.raw_input_tokens() * model_cost.input_per_token) - + (tokens.input_cached_tokens * model_cost.input_cached_per_token) - + (tokens.input_cache_write_tokens * model_cost.input_cache_write_per_token); + + cache_read_input + + cache_creation_input; // For now most of the models do not differentiate between reasoning and output token cost, // it costs the same. - let reasoning_cost = match model_cost.output_reasoning_per_token { - reasoning_cost if reasoning_cost > 0.0 => reasoning_cost, + let reasoning_per_token = match model_cost.output_reasoning_per_token { + r if r > 0.0 => r, _ => model_cost.output_per_token, }; - - let output = (tokens.raw_output_tokens() * model_cost.output_per_token) - + (tokens.output_reasoning_tokens * reasoning_cost); + let reasoning_output = tokens.output_reasoning_tokens * reasoning_per_token; + let output = (tokens.raw_output_tokens() * model_cost.output_per_token) + reasoning_output; let metric_label = match (input, output) { (x, y) if x < 0.0 || y < 0.0 => "calculation_negative", @@ -131,7 +139,13 @@ pub fn calculate_costs( platform = platform, ); - Some(CalculatedCost { input, output }) + Some(CalculatedCost { + input, + output, + cache_read_input, + cache_creation_input, + reasoning_output, + }) } /// Default AI operation stored in [`GEN_AI__OPERATION__TYPE`](relay_conventions::attributes::GEN_AI__OPERATION__TYPE) @@ -214,11 +228,17 @@ fn extract_ai_model_cost_data( data.gen_ai_cost_total_tokens .set_value(Value::F64(costs.total()).into()); - // Set individual cost components data.gen_ai_cost_input_tokens .set_value(Value::F64(costs.input).into()); data.gen_ai_cost_output_tokens .set_value(Value::F64(costs.output).into()); + + data.gen_ai_cost_cache_read_input_tokens + .set_value(Value::F64(costs.cache_read_input).into()); + data.gen_ai_cost_cache_creation_input_tokens + .set_value(Value::F64(costs.cache_creation_input).into()); + data.gen_ai_cost_reasoning_output_tokens + .set_value(Value::F64(costs.reasoning_output).into()); } /// Maps AI-related measurements (legacy) to span data. @@ -531,6 +551,9 @@ mod tests { CalculatedCost { input: 5.5, output: 39.0, + cache_read_input: 2.5, + cache_creation_input: 0.0, + reasoning_output: 27.0, } "); } @@ -562,6 +585,9 @@ mod tests { CalculatedCost { input: 5.5, output: 30.0, + cache_read_input: 2.5, + cache_creation_input: 0.0, + reasoning_output: 18.0, } "); } @@ -595,6 +621,9 @@ mod tests { CalculatedCost { input: -9.0, output: -7.0, + cache_read_input: 11.0, + cache_creation_input: 0.0, + reasoning_output: 9.0, } "); } @@ -628,6 +657,9 @@ mod tests { CalculatedCost { input: 82.5, output: 110.0, + cache_read_input: 10.0, + cache_creation_input: 22.5, + reasoning_output: 30.0, } "); } @@ -637,9 +669,9 @@ mod tests { // Test that cost calculation works when cache_write field is missing (backward compatibility) let span_data = SpanData { gen_ai_usage_input_tokens: Annotated::new(100.0.into()), - gen_ai_usage_input_tokens_cached: Annotated::new(20.0.into()), + gen_ai_usage_cache_read_input_tokens: Annotated::new(20.0.into()), gen_ai_usage_output_tokens: Annotated::new(50.0.into()), - // Note: gen_ai_usage_input_tokens_cache_write is NOT set (simulating old data) + // Note: gen_ai_usage_cache_creation_input_tokens is NOT set (simulating old data) ..Default::default() }; @@ -669,6 +701,9 @@ mod tests { CalculatedCost { input: 90.0, output: 100.0, + cache_read_input: 10.0, + cache_creation_input: 0.0, + reasoning_output: 0.0, } "); } diff --git a/relay-event-schema/src/protocol/span.rs b/relay-event-schema/src/protocol/span.rs index b5bfb337339..f28a7765296 100644 --- a/relay-event-schema/src/protocol/span.rs +++ b/relay-event-schema/src/protocol/span.rs @@ -494,12 +494,18 @@ pub struct SpanData { /// The input tokens used by an LLM call that were cached /// (cheaper and faster than non-cached input tokens) - #[metastructure(field = "gen_ai.usage.input_tokens.cached")] - pub gen_ai_usage_input_tokens_cached: Annotated, + #[metastructure( + field = "gen_ai.usage.cache_read.input_tokens", + legacy_alias = "gen_ai.usage.input_tokens.cached" + )] + pub gen_ai_usage_cache_read_input_tokens: Annotated, /// The input tokens written to cache during an LLM call - #[metastructure(field = "gen_ai.usage.input_tokens.cache_write")] - pub gen_ai_usage_input_tokens_cache_write: Annotated, + #[metastructure( + field = "gen_ai.usage.cache_creation.input_tokens", + legacy_alias = "gen_ai.usage.input_tokens.cache_write" + )] + pub gen_ai_usage_cache_creation_input_tokens: Annotated, /// The output tokens used by an LLM call (the ones the LLM actually generated) #[metastructure( @@ -511,8 +517,11 @@ pub struct SpanData { /// The output tokens used to represent the model's internal thought /// process while generating a response - #[metastructure(field = "gen_ai.usage.output_tokens.reasoning")] - pub gen_ai_usage_output_tokens_reasoning: Annotated, + #[metastructure( + field = "gen_ai.usage.reasoning.output_tokens", + legacy_alias = "gen_ai.usage.output_tokens.reasoning" + )] + pub gen_ai_usage_reasoning_output_tokens: Annotated, // Exact model used to generate the response (e.g. gpt-4o-mini-2024-07-18) #[metastructure(field = "gen_ai.response.model")] @@ -542,6 +551,18 @@ pub struct SpanData { #[metastructure(field = "gen_ai.cost.output_tokens")] pub gen_ai_cost_output_tokens: Annotated, + /// The cost of cached input tokens in USD + #[metastructure(field = "gen_ai.cost.cache_read.input_tokens")] + pub gen_ai_cost_cache_read_input_tokens: Annotated, + + /// The cost of input tokens written to cache in USD + #[metastructure(field = "gen_ai.cost.cache_creation.input_tokens")] + pub gen_ai_cost_cache_creation_input_tokens: Annotated, + + /// The cost of reasoning output tokens in USD + #[metastructure(field = "gen_ai.cost.reasoning.output_tokens")] + pub gen_ai_cost_reasoning_output_tokens: Annotated, + /// The input messages to the model call. #[metastructure( field = "gen_ai.input.messages", @@ -1441,16 +1462,16 @@ mod tests { .unwrap() .into_value() .unwrap(); - insta::assert_debug_snapshot!(data, @r###" + insta::assert_debug_snapshot!(data, @r#" SpanData { app_start_type: ~, gen_ai_pipeline_name: ~, gen_ai_usage_total_tokens: ~, gen_ai_usage_input_tokens: ~, - gen_ai_usage_input_tokens_cached: ~, - gen_ai_usage_input_tokens_cache_write: ~, + gen_ai_usage_cache_read_input_tokens: ~, + gen_ai_usage_cache_creation_input_tokens: ~, gen_ai_usage_output_tokens: ~, - gen_ai_usage_output_tokens_reasoning: ~, + gen_ai_usage_reasoning_output_tokens: ~, gen_ai_response_model: ~, gen_ai_request_model: ~, gen_ai_context_window_size: ~, @@ -1458,6 +1479,9 @@ mod tests { gen_ai_cost_total_tokens: ~, gen_ai_cost_input_tokens: ~, gen_ai_cost_output_tokens: ~, + gen_ai_cost_cache_read_input_tokens: ~, + gen_ai_cost_cache_creation_input_tokens: ~, + gen_ai_cost_reasoning_output_tokens: ~, gen_ai_input_messages: ~, gen_ai_tool_call_arguments: ~, gen_ai_tool_call_result: ~, @@ -1576,7 +1600,7 @@ mod tests { ), }, } - "###); + "#); assert_eq!(data.get_value("foo"), Some(Val::U64(2))); assert_eq!(data.get_value("bar"), Some(Val::String("3"))); diff --git a/relay-event-schema/src/protocol/span/convert.rs b/relay-event-schema/src/protocol/span/convert.rs index 878a0e919ab..dc7df7a1228 100644 --- a/relay-event-schema/src/protocol/span/convert.rs +++ b/relay-event-schema/src/protocol/span/convert.rs @@ -170,10 +170,10 @@ mod tests { gen_ai_pipeline_name: ~, gen_ai_usage_total_tokens: ~, gen_ai_usage_input_tokens: ~, - gen_ai_usage_input_tokens_cached: ~, - gen_ai_usage_input_tokens_cache_write: ~, + gen_ai_usage_cache_read_input_tokens: ~, + gen_ai_usage_cache_creation_input_tokens: ~, gen_ai_usage_output_tokens: ~, - gen_ai_usage_output_tokens_reasoning: ~, + gen_ai_usage_reasoning_output_tokens: ~, gen_ai_response_model: ~, gen_ai_request_model: ~, gen_ai_context_window_size: ~, @@ -181,6 +181,9 @@ mod tests { gen_ai_cost_total_tokens: ~, gen_ai_cost_input_tokens: ~, gen_ai_cost_output_tokens: ~, + gen_ai_cost_cache_read_input_tokens: ~, + gen_ai_cost_cache_creation_input_tokens: ~, + gen_ai_cost_reasoning_output_tokens: ~, gen_ai_input_messages: ~, gen_ai_tool_call_arguments: ~, gen_ai_tool_call_result: ~,