relay_event_normalization/normalize/
utils.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
//! **Deprecated.** Utilities for extracting common event fields.
//!
//! This utility module is being phased out. Functionality in this module should be moved to the
//! specific normalization file requiring this data access.

use std::f64::consts::SQRT_2;

use relay_event_schema::protocol::{Event, ResponseContext, Span, TraceContext, User};
use relay_protocol::Value;

/// Used to decide when to extract mobile-specific tags.
pub const MOBILE_SDKS: [&str; 4] = [
    "sentry.cocoa",
    "sentry.dart.flutter",
    "sentry.java.android",
    "sentry.javascript.react-native",
];

/// Allowed value for main thread name.
pub const MAIN_THREAD_NAME: &str = "main";

/// Maximum length of a mobile span or measurement in milliseconds.
///
/// Spans like `ui.load` with an `exclusive_time` that exceeds this number will be removed,
/// as well as mobile measurements (on transactions) such as `app.start.cold`, etc.
pub const MAX_DURATION_MOBILE_MS: f64 = 180_000.0;

/// Extract the HTTP status code from the span data.
pub fn http_status_code_from_span(span: &Span) -> Option<String> {
    // For SDKs which put the HTTP status code into the span data.
    if let Some(status_code) = span
        .data
        .value()
        .and_then(|data| data.http_response_status_code.value())
        .map(|v| match v {
            Value::String(s) => Some(s.as_str().to_owned()),
            Value::I64(i) => Some(i.to_string()),
            Value::U64(u) => Some(u.to_string()),
            _ => None,
        })
    {
        return status_code;
    }

    // For SDKs which put the HTTP status code into the span tags.
    if let Some(status_code) = span
        .tags
        .value()
        .and_then(|tags| tags.get("http.status_code"))
        .and_then(|v| v.as_str())
        .map(|v| v.to_owned())
    {
        return Some(status_code);
    }

    None
}

/// Extracts the HTTP status code.
pub fn extract_http_status_code(event: &Event) -> Option<String> {
    // For SDKs which put the HTTP status code in the event tags.
    if let Some(status_code) = event.tag_value("http.status_code") {
        return Some(status_code.to_owned());
    }

    if let Some(spans) = event.spans.value() {
        for span in spans {
            if let Some(span_value) = span.value() {
                if let Some(status_code) = http_status_code_from_span(span_value) {
                    return Some(status_code);
                }
            }
        }
    }

    // For SDKs which put the HTTP status code into the breadcrumbs data.
    if let Some(breadcrumbs) = event.breadcrumbs.value() {
        if let Some(values) = breadcrumbs.values.value() {
            for breadcrumb in values {
                // We need only the `http` type.
                if let Some(crumb) = breadcrumb
                    .value()
                    .filter(|bc| bc.ty.as_str() == Some("http"))
                {
                    // Try to get the status code om the map.
                    if let Some(status_code) = crumb.data.value().and_then(|v| v.get("status_code"))
                    {
                        return status_code.value().and_then(|v| v.as_str()).map(Into::into);
                    }
                }
            }
        }
    }

    // For SDKs which put the HTTP status code in the `Response` context.
    if let Some(response_context) = event.context::<ResponseContext>() {
        let status_code = response_context
            .status_code
            .value()
            .map(|code| code.to_string());
        return status_code;
    }

    None
}

/// Compute the transaction event's "user" tag as close as possible to how users are determined in
/// the transactions dataset in Snuba. This should produce the exact same user counts as the `user`
/// column in Discover for Transactions, barring:
///
/// * imprecision caused by HLL sketching in Snuba, which we don't have in events
/// * hash collisions in `BucketValue::set_from_display`, which we don't have in events
/// * MD5-collisions caused by `EventUser.hash_from_tag`, which we don't have in metrics
///
///   MD5 is used to efficiently look up the current event user for an event, and if there is a
///   collision it seems that this code will fetch an event user with potentially different values
///   for everything that is in `defaults`:
///   <https://github.com/getsentry/sentry/blob/f621cd76da3a39836f34802ba9b35133bdfbe38b/src/sentry/event_manager.py#L1058-L1060>
///
/// The performance product runs a discover query such as `count_unique(user)`, which maps to two
/// things:
///
/// * `user` metric for the metrics dataset
/// * the "promoted tag" column `user` in the transactions clickhouse table
///
/// A promoted tag is a tag that snuba pulls out into its own column. In this case it pulls out the
/// `sentry:user` tag from the event payload:
/// <https://github.com/getsentry/snuba/blob/430763e67e30957c89126e62127e34051eb52fd6/snuba/datasets/transactions_processor.py#L151>
///
/// Sentry's processing pipeline defers to `sentry.models.EventUser` to produce the `sentry:user` tag
/// here: <https://github.com/getsentry/sentry/blob/f621cd76da3a39836f34802ba9b35133bdfbe38b/src/sentry/event_manager.py#L790-L794>
///
/// `sentry.models.eventuser.KEYWORD_MAP` determines which attributes are looked up in which order, here:
/// <https://github.com/getsentry/sentry/blob/f621cd76da3a39836f34802ba9b35133bdfbe38b/src/sentry/models/eventuser.py#L18>
/// If its order is changed, this function needs to be changed.
pub fn get_event_user_tag(user: &User) -> Option<String> {
    if let Some(id) = user.id.as_str() {
        return Some(format!("id:{id}"));
    }

    if let Some(username) = user.username.as_str() {
        return Some(format!("username:{username}"));
    }

    if let Some(email) = user.email.as_str() {
        return Some(format!("email:{email}"));
    }

    if let Some(ip_address) = user.ip_address.as_str() {
        return Some(format!("ip:{ip_address}"));
    }

    None
}

/// Returns a normalized `op` from the given trace context.
pub fn extract_transaction_op(trace_context: &TraceContext) -> Option<String> {
    let op = trace_context.op.value()?;
    if op == "default" {
        // This was likely set by normalization, so let's treat it as None
        // See https://github.com/getsentry/relay/blob/bb2ac4ee82c25faa07a6d078f93d22d799cfc5d1/relay-general/src/store/transactions.rs#L96

        // Note that this is the opposite behavior of what we do for transaction.status, where
        // we coalesce None to "unknown".
        return None;
    }
    Some(op.to_string())
}

/// The Gauss error function.
///
/// See <https://en.wikipedia.org/wiki/Error_function>.
fn erf(x: f64) -> f64 {
    // constants
    let a1 = 0.254829592;
    let a2 = -0.284496736;
    let a3 = 1.421413741;
    let a4 = -1.453152027;
    let a5 = 1.061405429;
    let p = 0.3275911;
    // Save the sign of x
    let sign = if x < 0.0 { -1.0 } else { 1.0 };
    let x = x.abs();
    // A&S formula 7.1.26
    let t = 1.0 / (1.0 + p * x);
    let y = 1.0 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * (-x * x).exp();
    sign * y
}

/// Sigma function for CDF score calculation.
fn calculate_cdf_sigma(p10: f64, p50: f64) -> f64 {
    (p10.ln() - p50.ln()).abs() / (SQRT_2 * 0.9061938024368232)
}

/// Calculates a log-normal CDF score based on a log-normal with a specific p10 and p50
pub fn calculate_cdf_score(value: f64, p10: f64, p50: f64) -> f64 {
    0.5 * (1.0 - erf((f64::ln(value) - f64::ln(p50)) / (SQRT_2 * calculate_cdf_sigma(p50, p10))))
}

#[cfg(test)]
mod tests {
    use crate::utils::{get_event_user_tag, http_status_code_from_span};
    use relay_event_schema::protocol::{Span, User};
    use relay_protocol::Annotated;

    #[test]
    fn test_get_event_user_tag() {
        // Note: If this order changes,
        // https://github.com/getsentry/sentry/blob/f621cd76da3a39836f34802ba9b35133bdfbe38b/src/sentry/models/eventuser.py#L18
        // has to be changed. Though it is probably not a good idea!
        let user = User {
            id: Annotated::new("ident".to_owned().into()),
            username: Annotated::new("username".to_owned().into()),
            email: Annotated::new("email".to_owned()),
            ip_address: Annotated::new("127.0.0.1".parse().unwrap()),
            ..User::default()
        };

        assert_eq!(get_event_user_tag(&user).unwrap(), "id:ident");

        let user = User {
            username: Annotated::new("username".to_owned().into()),
            email: Annotated::new("email".to_owned()),
            ip_address: Annotated::new("127.0.0.1".parse().unwrap()),
            ..User::default()
        };

        assert_eq!(get_event_user_tag(&user).unwrap(), "username:username");

        let user = User {
            email: Annotated::new("email".to_owned()),
            ip_address: Annotated::new("127.0.0.1".parse().unwrap()),
            ..User::default()
        };

        assert_eq!(get_event_user_tag(&user).unwrap(), "email:email");

        let user = User {
            ip_address: Annotated::new("127.0.0.1".parse().unwrap()),
            ..User::default()
        };

        assert_eq!(get_event_user_tag(&user).unwrap(), "ip:127.0.0.1");

        let user = User::default();

        assert!(get_event_user_tag(&user).is_none());
    }

    #[test]
    fn test_extracts_http_status_code_when_int() {
        let span = Annotated::<Span>::from_json(
            r#"{
                "data": {
                    "http.response.status_code": 400
                }
            }"#,
        )
        .unwrap()
        .into_value()
        .unwrap();

        let result = http_status_code_from_span(&span);

        assert_eq!(result, Some("400".to_string()));
    }
}