relay_event_normalization/normalize/span/
ai.rs

1//! AI cost calculation.
2
3use crate::normalize::AiOperationTypeMap;
4use crate::{ModelCostV2, ModelCosts};
5use relay_event_schema::protocol::{Event, Span, SpanData};
6use relay_protocol::{Annotated, Getter, Value};
7
8/// Calculates the cost of an AI model based on the model cost and the tokens used.
9/// Calculated cost is in US dollars.
10fn extract_ai_model_cost_data(model_cost: Option<&ModelCostV2>, data: &mut SpanData) {
11    let cost_per_token = match model_cost {
12        Some(v) => v,
13        None => return,
14    };
15
16    let input_tokens_used = data
17        .gen_ai_usage_input_tokens
18        .value()
19        .and_then(Value::as_f64);
20
21    let output_tokens_used = data
22        .gen_ai_usage_output_tokens
23        .value()
24        .and_then(Value::as_f64);
25    let output_reasoning_tokens_used = data
26        .gen_ai_usage_output_tokens_reasoning
27        .value()
28        .and_then(Value::as_f64);
29    let input_cached_tokens_used = data
30        .gen_ai_usage_input_tokens_cached
31        .value()
32        .and_then(Value::as_f64);
33
34    if input_tokens_used.is_none() && output_tokens_used.is_none() {
35        return;
36    }
37
38    let mut input_cost = 0.0;
39    let mut output_cost = 0.0;
40    // Cached tokens are subset of the input tokens, so we need to subtract them
41    // from the input tokens
42    input_cost += cost_per_token.input_per_token
43        * (input_tokens_used.unwrap_or(0.0) - input_cached_tokens_used.unwrap_or(0.0));
44    input_cost += cost_per_token.input_cached_per_token * input_cached_tokens_used.unwrap_or(0.0);
45    // Reasoning tokens are subset of the output tokens, so we need to subtract
46    // them from the output tokens
47    output_cost += cost_per_token.output_per_token
48        * (output_tokens_used.unwrap_or(0.0) - output_reasoning_tokens_used.unwrap_or(0.0));
49
50    if cost_per_token.output_reasoning_per_token > 0.0 {
51        // for now most of the models do not differentiate between reasoning and output token cost,
52        // it costs the same
53        output_cost +=
54            cost_per_token.output_reasoning_per_token * output_reasoning_tokens_used.unwrap_or(0.0);
55    } else {
56        output_cost +=
57            cost_per_token.output_per_token * output_reasoning_tokens_used.unwrap_or(0.0);
58    }
59
60    let result = input_cost + output_cost;
61    // double write during migration period
62    // 'gen_ai_usage_total_cost' is deprecated and will be removed in the future
63    data.gen_ai_usage_total_cost
64        .set_value(Value::F64(result).into());
65    data.gen_ai_cost_total_tokens
66        .set_value(Value::F64(result).into());
67
68    // Set individual cost components
69    data.gen_ai_cost_input_tokens
70        .set_value(Value::F64(input_cost).into());
71    data.gen_ai_cost_output_tokens
72        .set_value(Value::F64(output_cost).into());
73}
74
75/// Maps AI-related measurements (legacy) to span data.
76fn map_ai_measurements_to_data(span: &mut Span) {
77    let measurements = span.measurements.value();
78    let data = span.data.get_or_insert_with(SpanData::default);
79
80    let set_field_from_measurement = |target_field: &mut Annotated<Value>,
81                                      measurement_key: &str| {
82        if let Some(measurements) = measurements
83            && target_field.value().is_none()
84            && let Some(value) = measurements.get_value(measurement_key)
85        {
86            target_field.set_value(Value::F64(value.to_f64()).into());
87        }
88    };
89
90    set_field_from_measurement(&mut data.gen_ai_usage_total_tokens, "ai_total_tokens_used");
91    set_field_from_measurement(&mut data.gen_ai_usage_input_tokens, "ai_prompt_tokens_used");
92    set_field_from_measurement(
93        &mut data.gen_ai_usage_output_tokens,
94        "ai_completion_tokens_used",
95    );
96
97    // It might be that 'total_tokens' is not set in which case we need to calculate it
98    if data.gen_ai_usage_total_tokens.value().is_none() {
99        let input_tokens = data
100            .gen_ai_usage_input_tokens
101            .value()
102            .and_then(Value::as_f64);
103        let output_tokens = data
104            .gen_ai_usage_output_tokens
105            .value()
106            .and_then(Value::as_f64);
107
108        if input_tokens.is_none() && output_tokens.is_none() {
109            // don't set total_tokens if there are no input nor output tokens
110            return;
111        }
112
113        data.gen_ai_usage_total_tokens.set_value(
114            Value::F64(input_tokens.unwrap_or(0.0) + output_tokens.unwrap_or(0.0)).into(),
115        );
116    }
117}
118
119/// Extract the additional data into the span
120fn extract_ai_data(span: &mut Span, ai_model_costs: &ModelCosts) {
121    let duration = span
122        .get_value("span.duration")
123        .and_then(|v| v.as_f64())
124        .unwrap_or(0.0);
125
126    let data = span.data.get_or_insert_with(SpanData::default);
127
128    // Extracts the response tokens per second
129    if data.gen_ai_response_tokens_per_second.value().is_none()
130        && duration > 0.0
131        && let Some(output_tokens) = data
132            .gen_ai_usage_output_tokens
133            .value()
134            .and_then(Value::as_f64)
135    {
136        data.gen_ai_response_tokens_per_second
137            .set_value(Value::F64(output_tokens / (duration / 1000.0)).into());
138    }
139
140    // Extracts the total cost of the AI model used
141    if let Some(model_id) = data
142        .gen_ai_request_model
143        .value()
144        .and_then(|val| val.as_str())
145        .or_else(|| {
146            data.gen_ai_response_model
147                .value()
148                .and_then(|val| val.as_str())
149        })
150    {
151        extract_ai_model_cost_data(ai_model_costs.cost_per_token(model_id), data)
152    }
153}
154
155/// Enrich the AI span data
156pub fn enrich_ai_span_data(
157    span: &mut Span,
158    model_costs: Option<&ModelCosts>,
159    operation_type_map: Option<&AiOperationTypeMap>,
160) {
161    if !is_ai_span(span) {
162        return;
163    }
164
165    map_ai_measurements_to_data(span);
166    if let Some(model_costs) = model_costs {
167        extract_ai_data(span, model_costs);
168    }
169    if let Some(operation_type_map) = operation_type_map {
170        infer_ai_operation_type(span, operation_type_map);
171    }
172}
173
174/// Extract the ai data from all of an event's spans
175pub fn enrich_ai_event_data(
176    event: &mut Event,
177    model_costs: Option<&ModelCosts>,
178    operation_type_map: Option<&AiOperationTypeMap>,
179) {
180    let spans = event.spans.value_mut().iter_mut().flatten();
181    let spans = spans.filter_map(|span| span.value_mut().as_mut());
182
183    for span in spans {
184        enrich_ai_span_data(span, model_costs, operation_type_map);
185    }
186}
187
188///  Infer AI operation type mapping to a span.
189///
190/// This function maps span.op values to gen_ai.operation.type based on the provided
191/// operation type map configuration.
192fn infer_ai_operation_type(span: &mut Span, operation_type_map: &AiOperationTypeMap) {
193    let data = span.data.get_or_insert_with(SpanData::default);
194
195    if let Some(op) = span.op.value()
196        && let Some(operation_type) = operation_type_map.get_operation_type(op)
197    {
198        data.gen_ai_operation_type
199            .set_value(Some(operation_type.to_owned()));
200    }
201}
202
203/// Returns true if the span is an AI span.
204/// AI spans are spans with op starting with "ai." (legacy) or "gen_ai." (new).
205fn is_ai_span(span: &Span) -> bool {
206    span.op
207        .value()
208        .is_some_and(|op| op.starts_with("ai.") || op.starts_with("gen_ai."))
209}