Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion relay-conventions/sentry-conventions
73 changes: 73 additions & 0 deletions relay-event-normalization/src/eap/ai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,19 @@ fn normalize_ai_costs(attributes: &mut Attributes, model_metadata: Option<&Model
attributes.insert(GEN_AI__COST__INPUT_TOKENS, costs.input);
attributes.insert(GEN_AI__COST__OUTPUT_TOKENS, costs.output);
attributes.insert(GEN_AI__COST__TOTAL_TOKENS, costs.total());

attributes.insert(
GEN_AI__COST__CACHE_READ__INPUT_TOKENS,
costs.cache_read_input,
);
attributes.insert(
GEN_AI__COST__CACHE_CREATION__INPUT_TOKENS,
costs.cache_creation_input,
);
attributes.insert(
GEN_AI__COST__REASONING__OUTPUT_TOKENS,
costs.reasoning_output,
);
}

fn extract_string_value<'a>(attributes: &'a Attributes, key: &str) -> Option<&'a str> {
Expand Down Expand Up @@ -305,6 +318,14 @@ mod tests {

assert_annotated_snapshot!(attributes, @r#"
{
"gen_ai.cost.cache_creation.input_tokens": {
"type": "double",
"value": 0.0
},
"gen_ai.cost.cache_read.input_tokens": {
"type": "double",
"value": 20.0
},
"gen_ai.cost.input_tokens": {
"type": "double",
"value": 25.0
Expand All @@ -313,6 +334,10 @@ mod tests {
"type": "double",
"value": 50.0
},
"gen_ai.cost.reasoning.output_tokens": {
"type": "double",
"value": 30.0
},
"gen_ai.cost.total_tokens": {
"type": "double",
"value": 75.0
Expand Down Expand Up @@ -374,6 +399,14 @@ mod tests {

assert_annotated_snapshot!(attributes, @r#"
{
"gen_ai.cost.cache_creation.input_tokens": {
"type": "double",
"value": 0.0
},
"gen_ai.cost.cache_read.input_tokens": {
"type": "double",
"value": 0.0
},
"gen_ai.cost.input_tokens": {
"type": "double",
"value": 90.0
Expand All @@ -382,6 +415,10 @@ mod tests {
"type": "double",
"value": 100.0
},
"gen_ai.cost.reasoning.output_tokens": {
"type": "double",
"value": 0.0
},
"gen_ai.cost.total_tokens": {
"type": "double",
"value": 190.0
Expand Down Expand Up @@ -483,6 +520,14 @@ mod tests {

assert_annotated_snapshot!(attributes, @r#"
{
"gen_ai.cost.cache_creation.input_tokens": {
"type": "double",
"value": 0.0
},
"gen_ai.cost.cache_read.input_tokens": {
"type": "double",
"value": 0.0
},
"gen_ai.cost.input_tokens": {
"type": "double",
"value": 90.0
Expand All @@ -491,6 +536,10 @@ mod tests {
"type": "double",
"value": 100.0
},
"gen_ai.cost.reasoning.output_tokens": {
"type": "double",
"value": 0.0
},
"gen_ai.cost.total_tokens": {
"type": "double",
"value": 190.0
Expand Down Expand Up @@ -552,6 +601,14 @@ mod tests {

assert_annotated_snapshot!(attributes, @r#"
{
"gen_ai.cost.cache_creation.input_tokens": {
"type": "double",
"value": 0.0
},
"gen_ai.cost.cache_read.input_tokens": {
"type": "double",
"value": 0.0
},
"gen_ai.cost.input_tokens": {
"type": "double",
"value": 90.0
Expand All @@ -560,6 +617,10 @@ mod tests {
"type": "double",
"value": 100.0
},
"gen_ai.cost.reasoning.output_tokens": {
"type": "double",
"value": 0.0
},
"gen_ai.cost.total_tokens": {
"type": "double",
"value": 190.0
Expand Down Expand Up @@ -683,6 +744,14 @@ mod tests {
"type": "integer",
"value": 100000
},
"gen_ai.cost.cache_creation.input_tokens": {
"type": "double",
"value": 0.0
},
"gen_ai.cost.cache_read.input_tokens": {
"type": "double",
"value": 0.0
},
"gen_ai.cost.input_tokens": {
"type": "double",
"value": 300.0
Expand All @@ -691,6 +760,10 @@ mod tests {
"type": "double",
"value": 240.0
},
"gen_ai.cost.reasoning.output_tokens": {
"type": "double",
"value": 0.0
},
"gen_ai.cost.total_tokens": {
"type": "double",
"value": 540.0
Expand Down
29 changes: 25 additions & 4 deletions relay-event-normalization/src/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2568,6 +2568,9 @@ mod tests {
"gen_ai.cost.total_tokens": 50.0,
"gen_ai.cost.input_tokens": 10.0,
"gen_ai.cost.output_tokens": 40.0,
"gen_ai.cost.cache_read.input_tokens": 0.0,
"gen_ai.cost.cache_creation.input_tokens": 0.0,
"gen_ai.cost.reasoning.output_tokens": 0.0,
"gen_ai.response.tokens_per_second": 62500.0,
"gen_ai.operation.type": "ai_client"
}
Expand All @@ -2582,6 +2585,9 @@ mod tests {
"gen_ai.cost.total_tokens": 80.0,
"gen_ai.cost.input_tokens": 20.0,
"gen_ai.cost.output_tokens": 60.0,
"gen_ai.cost.cache_read.input_tokens": 0.0,
"gen_ai.cost.cache_creation.input_tokens": 0.0,
"gen_ai.cost.reasoning.output_tokens": 0.0,
"gen_ai.response.tokens_per_second": 62500.0,
"gen_ai.operation.type": "ai_client"
}
Expand Down Expand Up @@ -2687,14 +2693,17 @@ mod tests {
{
"gen_ai.usage.total_tokens": 3000.0,
"gen_ai.usage.input_tokens": 1000,
"gen_ai.usage.input_tokens.cached": 500,
"gen_ai.usage.cache_read.input_tokens": 500,
"gen_ai.usage.output_tokens": 2000,
"gen_ai.usage.output_tokens.reasoning": 1000,
"gen_ai.usage.reasoning.output_tokens": 1000,
"gen_ai.response.model": "claude-2.1",
"gen_ai.request.model": "claude-2.1",
"gen_ai.cost.total_tokens": 75.0,
"gen_ai.cost.input_tokens": 25.0,
"gen_ai.cost.output_tokens": 50.0,
"gen_ai.cost.cache_read.input_tokens": 20.0,
"gen_ai.cost.cache_creation.input_tokens": 0.0,
"gen_ai.cost.reasoning.output_tokens": 30.0,
"gen_ai.response.tokens_per_second": 2000.0,
"gen_ai.operation.type": "ai_client"
}
Expand All @@ -2709,6 +2718,9 @@ mod tests {
"gen_ai.cost.total_tokens": 190.0,
"gen_ai.cost.input_tokens": 90.0,
"gen_ai.cost.output_tokens": 100.0,
"gen_ai.cost.cache_read.input_tokens": 0.0,
"gen_ai.cost.cache_creation.input_tokens": 0.0,
"gen_ai.cost.reasoning.output_tokens": 0.0,
"gen_ai.response.tokens_per_second": 2000.0,
"gen_ai.operation.type": "ai_client"
}
Expand All @@ -2722,6 +2734,9 @@ mod tests {
"gen_ai.cost.total_tokens": 190.0,
"gen_ai.cost.input_tokens": 90.0,
"gen_ai.cost.output_tokens": 100.0,
"gen_ai.cost.cache_read.input_tokens": 0.0,
"gen_ai.cost.cache_creation.input_tokens": 0.0,
"gen_ai.cost.reasoning.output_tokens": 0.0,
"gen_ai.response.tokens_per_second": 2000.0,
"gen_ai.operation.type": "ai_client"
}
Expand Down Expand Up @@ -2870,14 +2885,17 @@ mod tests {
{
"gen_ai.usage.total_tokens": 3000.0,
"gen_ai.usage.input_tokens": 1000,
"gen_ai.usage.input_tokens.cached": 500,
"gen_ai.usage.cache_read.input_tokens": 500,
"gen_ai.usage.output_tokens": 2000,
"gen_ai.usage.output_tokens.reasoning": 1000,
"gen_ai.usage.reasoning.output_tokens": 1000,
"gen_ai.response.model": "claude-2.1",
"gen_ai.request.model": "claude-2.1",
"gen_ai.cost.total_tokens": 65.0,
"gen_ai.cost.input_tokens": 25.0,
"gen_ai.cost.output_tokens": 40.0,
"gen_ai.cost.cache_read.input_tokens": 20.0,
"gen_ai.cost.cache_creation.input_tokens": 0.0,
"gen_ai.cost.reasoning.output_tokens": 20.0,
"gen_ai.response.tokens_per_second": 62500.0,
"gen_ai.operation.type": "ai_client"
}
Expand All @@ -2892,6 +2910,9 @@ mod tests {
"gen_ai.cost.total_tokens": 190.0,
"gen_ai.cost.input_tokens": 90.0,
"gen_ai.cost.output_tokens": 100.0,
"gen_ai.cost.cache_read.input_tokens": 0.0,
"gen_ai.cost.cache_creation.input_tokens": 0.0,
"gen_ai.cost.reasoning.output_tokens": 0.0,
"gen_ai.response.tokens_per_second": 62500.0,
"gen_ai.operation.type": "ai_client"
}
Expand Down
67 changes: 51 additions & 16 deletions relay-event-normalization/src/normalize/span/ai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ impl UsedTokens {
Self {
input_tokens: get_value!(data.gen_ai_usage_input_tokens),
output_tokens: get_value!(data.gen_ai_usage_output_tokens),
output_reasoning_tokens: get_value!(data.gen_ai_usage_output_tokens_reasoning),
input_cached_tokens: get_value!(data.gen_ai_usage_input_tokens_cached),
input_cache_write_tokens: get_value!(data.gen_ai_usage_input_tokens_cache_write),
output_reasoning_tokens: get_value!(data.gen_ai_usage_reasoning_output_tokens),
input_cached_tokens: get_value!(data.gen_ai_usage_cache_read_input_tokens),
input_cache_write_tokens: get_value!(data.gen_ai_usage_cache_creation_input_tokens),
}
}

Expand Down Expand Up @@ -72,10 +72,16 @@ impl UsedTokens {
/// Calculated model call costs.
#[derive(Debug, Copy, Clone)]
pub struct CalculatedCost {
/// The cost of input tokens used.
/// The total cost of all input tokens (raw + cached + cache_write).
pub input: f64,
/// The cost of output tokens used.
/// The total cost of all output tokens (raw + reasoning).
pub output: f64,
/// The cost of cached input tokens only (subset of `input`).
pub cache_read_input: f64,
/// The cost of cache-write input tokens only (subset of `input`).
pub cache_creation_input: f64,
/// The cost of reasoning output tokens only (subset of `output`).
pub reasoning_output: f64,
}

impl CalculatedCost {
Expand Down Expand Up @@ -104,19 +110,21 @@ pub fn calculate_costs(
return None;
}

let cache_read_input = tokens.input_cached_tokens * model_cost.input_cached_per_token;
let cache_creation_input =
tokens.input_cache_write_tokens * model_cost.input_cache_write_per_token;
let input = (tokens.raw_input_tokens() * model_cost.input_per_token)
+ (tokens.input_cached_tokens * model_cost.input_cached_per_token)
+ (tokens.input_cache_write_tokens * model_cost.input_cache_write_per_token);
+ cache_read_input
+ cache_creation_input;

// For now most of the models do not differentiate between reasoning and output token cost,
// it costs the same.
let reasoning_cost = match model_cost.output_reasoning_per_token {
reasoning_cost if reasoning_cost > 0.0 => reasoning_cost,
let reasoning_per_token = match model_cost.output_reasoning_per_token {
r if r > 0.0 => r,
_ => model_cost.output_per_token,
};

let output = (tokens.raw_output_tokens() * model_cost.output_per_token)
+ (tokens.output_reasoning_tokens * reasoning_cost);
let reasoning_output = tokens.output_reasoning_tokens * reasoning_per_token;
let output = (tokens.raw_output_tokens() * model_cost.output_per_token) + reasoning_output;

let metric_label = match (input, output) {
(x, y) if x < 0.0 || y < 0.0 => "calculation_negative",
Expand All @@ -131,7 +139,13 @@ pub fn calculate_costs(
platform = platform,
);

Some(CalculatedCost { input, output })
Some(CalculatedCost {
input,
output,
cache_read_input,
cache_creation_input,
reasoning_output,
})
}

/// Default AI operation stored in [`GEN_AI__OPERATION__TYPE`](relay_conventions::attributes::GEN_AI__OPERATION__TYPE)
Expand Down Expand Up @@ -214,11 +228,17 @@ fn extract_ai_model_cost_data(
data.gen_ai_cost_total_tokens
.set_value(Value::F64(costs.total()).into());

// Set individual cost components
data.gen_ai_cost_input_tokens
.set_value(Value::F64(costs.input).into());
data.gen_ai_cost_output_tokens
.set_value(Value::F64(costs.output).into());

data.gen_ai_cost_cache_read_input_tokens
.set_value(Value::F64(costs.cache_read_input).into());
data.gen_ai_cost_cache_creation_input_tokens
.set_value(Value::F64(costs.cache_creation_input).into());
data.gen_ai_cost_reasoning_output_tokens
.set_value(Value::F64(costs.reasoning_output).into());
}

/// Maps AI-related measurements (legacy) to span data.
Expand Down Expand Up @@ -531,6 +551,9 @@ mod tests {
CalculatedCost {
input: 5.5,
output: 39.0,
cache_read_input: 2.5,
cache_creation_input: 0.0,
reasoning_output: 27.0,
}
");
}
Expand Down Expand Up @@ -562,6 +585,9 @@ mod tests {
CalculatedCost {
input: 5.5,
output: 30.0,
cache_read_input: 2.5,
cache_creation_input: 0.0,
reasoning_output: 18.0,
}
");
}
Expand Down Expand Up @@ -595,6 +621,9 @@ mod tests {
CalculatedCost {
input: -9.0,
output: -7.0,
cache_read_input: 11.0,
cache_creation_input: 0.0,
reasoning_output: 9.0,
}
");
}
Expand Down Expand Up @@ -628,6 +657,9 @@ mod tests {
CalculatedCost {
input: 82.5,
output: 110.0,
cache_read_input: 10.0,
cache_creation_input: 22.5,
reasoning_output: 30.0,
}
");
}
Expand All @@ -637,9 +669,9 @@ mod tests {
// Test that cost calculation works when cache_write field is missing (backward compatibility)
let span_data = SpanData {
gen_ai_usage_input_tokens: Annotated::new(100.0.into()),
gen_ai_usage_input_tokens_cached: Annotated::new(20.0.into()),
gen_ai_usage_cache_read_input_tokens: Annotated::new(20.0.into()),
gen_ai_usage_output_tokens: Annotated::new(50.0.into()),
// Note: gen_ai_usage_input_tokens_cache_write is NOT set (simulating old data)
// Note: gen_ai_usage_cache_creation_input_tokens is NOT set (simulating old data)
..Default::default()
};

Expand Down Expand Up @@ -669,6 +701,9 @@ mod tests {
CalculatedCost {
input: 90.0,
output: 100.0,
cache_read_input: 10.0,
cache_creation_input: 0.0,
reasoning_output: 0.0,
}
");
}
Expand Down
Loading