relay_event_normalization/normalize/utils.rs
1//! **Deprecated.** Utilities for extracting common event fields.
2//!
3//! This utility module is being phased out. Functionality in this module should be moved to the
4//! specific normalization file requiring this data access.
5
6use std::f64::consts::SQRT_2;
7
8use relay_event_schema::protocol::{Event, ResponseContext, Span, TraceContext, User};
9use relay_protocol::Value;
10
11/// Used to decide when to extract mobile-specific tags.
12pub const MOBILE_SDKS: [&str; 4] = [
13 "sentry.cocoa",
14 "sentry.dart.flutter",
15 "sentry.java.android",
16 "sentry.javascript.react-native",
17];
18
19/// Allowed value for main thread name.
20pub const MAIN_THREAD_NAME: &str = "main";
21
22/// Maximum duration of a mobile measurement in milliseconds.
23///
24/// Mobile measurements (app start, TTID, TTFD) that exceed this threshold are considered
25/// outliers and removed.
26pub const MAX_DURATION_MOBILE_MS: f64 = 180_000.0;
27
28/// Extract the HTTP status code from the span data.
29pub fn http_status_code_from_span(span: &Span) -> Option<String> {
30 // For SDKs which put the HTTP status code into the span data.
31 if let Some(status_code) = span
32 .data
33 .value()
34 .and_then(|data| data.http_response_status_code.value())
35 .map(|v| match v {
36 Value::String(s) => Some(s.as_str().to_owned()),
37 Value::I64(i) => Some(i.to_string()),
38 Value::U64(u) => Some(u.to_string()),
39 _ => None,
40 })
41 {
42 return status_code;
43 }
44
45 // For SDKs which put the HTTP status code into the span tags.
46 if let Some(status_code) = span
47 .tags
48 .value()
49 .and_then(|tags| tags.get("http.status_code"))
50 .and_then(|v| v.as_str())
51 .map(|v| v.to_owned())
52 {
53 return Some(status_code);
54 }
55
56 None
57}
58
59/// Extracts the HTTP status code.
60pub fn extract_http_status_code(event: &Event) -> Option<String> {
61 // For SDKs which put the HTTP status code in the event tags.
62 if let Some(status_code) = event.tag_value("http.status_code") {
63 return Some(status_code.to_owned());
64 }
65
66 if let Some(spans) = event.spans.value() {
67 for span in spans {
68 if let Some(span_value) = span.value()
69 && let Some(status_code) = http_status_code_from_span(span_value)
70 {
71 return Some(status_code);
72 }
73 }
74 }
75
76 // For SDKs which put the HTTP status code into the breadcrumbs data.
77 if let Some(breadcrumbs) = event.breadcrumbs.value()
78 && let Some(values) = breadcrumbs.values.value()
79 {
80 for breadcrumb in values {
81 // We need only the `http` type.
82 if let Some(crumb) = breadcrumb
83 .value()
84 .filter(|bc| bc.ty.as_str() == Some("http"))
85 {
86 // Try to get the status code om the map.
87 if let Some(status_code) = crumb.data.value().and_then(|v| v.get("status_code")) {
88 return status_code.value().and_then(|v| v.as_str()).map(Into::into);
89 }
90 }
91 }
92 }
93
94 // For SDKs which put the HTTP status code in the `Response` context.
95 if let Some(response_context) = event.context::<ResponseContext>() {
96 let status_code = response_context
97 .status_code
98 .value()
99 .map(|code| code.to_string());
100 return status_code;
101 }
102
103 None
104}
105
106/// Compute the transaction event's "user" tag as close as possible to how users are determined in
107/// the transactions dataset in Snuba. This should produce the exact same user counts as the `user`
108/// column in Discover for Transactions, barring:
109///
110/// * imprecision caused by HLL sketching in Snuba, which we don't have in events
111/// * hash collisions in `BucketValue::set_from_display`, which we don't have in events
112/// * MD5-collisions caused by `EventUser.hash_from_tag`, which we don't have in metrics
113///
114/// MD5 is used to efficiently look up the current event user for an event, and if there is a
115/// collision it seems that this code will fetch an event user with potentially different values
116/// for everything that is in `defaults`:
117/// <https://github.com/getsentry/sentry/blob/f621cd76da3a39836f34802ba9b35133bdfbe38b/src/sentry/event_manager.py#L1058-L1060>
118///
119/// The performance product runs a discover query such as `count_unique(user)`, which maps to two
120/// things:
121///
122/// * `user` metric for the metrics dataset
123/// * the "promoted tag" column `user` in the transactions clickhouse table
124///
125/// A promoted tag is a tag that snuba pulls out into its own column. In this case it pulls out the
126/// `sentry:user` tag from the event payload:
127/// <https://github.com/getsentry/snuba/blob/430763e67e30957c89126e62127e34051eb52fd6/snuba/datasets/transactions_processor.py#L151>
128///
129/// Sentry's processing pipeline defers to `sentry.models.EventUser` to produce the `sentry:user` tag
130/// here: <https://github.com/getsentry/sentry/blob/f621cd76da3a39836f34802ba9b35133bdfbe38b/src/sentry/event_manager.py#L790-L794>
131///
132/// `sentry.models.eventuser.KEYWORD_MAP` determines which attributes are looked up in which order, here:
133/// <https://github.com/getsentry/sentry/blob/f621cd76da3a39836f34802ba9b35133bdfbe38b/src/sentry/models/eventuser.py#L18>
134/// If its order is changed, this function needs to be changed.
135pub fn get_event_user_tag(user: &User) -> Option<String> {
136 if let Some(id) = user.id.as_str() {
137 return Some(format!("id:{id}"));
138 }
139
140 if let Some(username) = user.username.as_str() {
141 return Some(format!("username:{username}"));
142 }
143
144 if let Some(email) = user.email.as_str() {
145 return Some(format!("email:{email}"));
146 }
147
148 if let Some(ip_address) = user.ip_address.as_str() {
149 return Some(format!("ip:{ip_address}"));
150 }
151
152 None
153}
154
155/// Returns a normalized `op` from the given trace context.
156pub fn extract_transaction_op(trace_context: &TraceContext) -> Option<String> {
157 let op = trace_context.op.value()?;
158 if op == "default" {
159 // This was likely set by normalization, so let's treat it as None
160 // See https://github.com/getsentry/relay/blob/bb2ac4ee82c25faa07a6d078f93d22d799cfc5d1/relay-general/src/store/transactions.rs#L96
161
162 // Note that this is the opposite behavior of what we do for transaction.status, where
163 // we coalesce None to "unknown".
164 return None;
165 }
166 Some(op.to_string())
167}
168
169/// The Gauss error function.
170///
171/// See <https://en.wikipedia.org/wiki/Error_function>.
172fn erf(x: f64) -> f64 {
173 // constants
174 let a1 = 0.254829592;
175 let a2 = -0.284496736;
176 let a3 = 1.421413741;
177 let a4 = -1.453152027;
178 let a5 = 1.061405429;
179 let p = 0.3275911;
180 // Save the sign of x
181 let sign = if x < 0.0 { -1.0 } else { 1.0 };
182 let x = x.abs();
183 // A&S formula 7.1.26
184 let t = 1.0 / (1.0 + p * x);
185 let y = 1.0 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * (-x * x).exp();
186 sign * y
187}
188
189/// Sigma function for CDF score calculation.
190fn calculate_cdf_sigma(p10: f64, p50: f64) -> f64 {
191 (p10.ln() - p50.ln()).abs() / (SQRT_2 * 0.9061938024368232)
192}
193
194/// Computes the [cumulative distribution function](https://en.wikipedia.org/wiki/Cumulative_distribution_function)
195/// of a [log-normal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution) with the given p10 and p50.
196///
197/// In other words, if `X` is log-normally distributed with 10th and 50th percentile `p10` and `p50`,
198/// then `calculate_cdf_score(x, p10, p50) = P(X ≤ x)`.
199pub fn calculate_cdf_score(value: f64, p10: f64, p50: f64) -> f64 {
200 0.5 * (1.0 - erf((f64::ln(value) - f64::ln(p50)) / (SQRT_2 * calculate_cdf_sigma(p50, p10))))
201}
202
203#[cfg(test)]
204mod tests {
205 use crate::utils::{get_event_user_tag, http_status_code_from_span};
206 use relay_event_schema::protocol::{Span, User};
207 use relay_protocol::Annotated;
208
209 #[test]
210 fn test_get_event_user_tag() {
211 // Note: If this order changes,
212 // https://github.com/getsentry/sentry/blob/f621cd76da3a39836f34802ba9b35133bdfbe38b/src/sentry/models/eventuser.py#L18
213 // has to be changed. Though it is probably not a good idea!
214 let user = User {
215 id: Annotated::new("ident".to_owned().into()),
216 username: Annotated::new("username".to_owned().into()),
217 email: Annotated::new("email".to_owned()),
218 ip_address: Annotated::new("127.0.0.1".parse().unwrap()),
219 ..User::default()
220 };
221
222 assert_eq!(get_event_user_tag(&user).unwrap(), "id:ident");
223
224 let user = User {
225 username: Annotated::new("username".to_owned().into()),
226 email: Annotated::new("email".to_owned()),
227 ip_address: Annotated::new("127.0.0.1".parse().unwrap()),
228 ..User::default()
229 };
230
231 assert_eq!(get_event_user_tag(&user).unwrap(), "username:username");
232
233 let user = User {
234 email: Annotated::new("email".to_owned()),
235 ip_address: Annotated::new("127.0.0.1".parse().unwrap()),
236 ..User::default()
237 };
238
239 assert_eq!(get_event_user_tag(&user).unwrap(), "email:email");
240
241 let user = User {
242 ip_address: Annotated::new("127.0.0.1".parse().unwrap()),
243 ..User::default()
244 };
245
246 assert_eq!(get_event_user_tag(&user).unwrap(), "ip:127.0.0.1");
247
248 let user = User::default();
249
250 assert!(get_event_user_tag(&user).is_none());
251 }
252
253 #[test]
254 fn test_extracts_http_status_code_when_int() {
255 let span = Annotated::<Span>::from_json(
256 r#"{
257 "data": {
258 "http.response.status_code": 400
259 }
260 }"#,
261 )
262 .unwrap()
263 .into_value()
264 .unwrap();
265
266 let result = http_status_code_from_span(&span);
267
268 assert_eq!(result, Some("400".to_owned()));
269 }
270}