relay_event_normalization/normalize/
request.rs

1//! Normalization of the [`Request`] interface.
2//!
3//! See [`normalize_request`] for more information.
4
5use std::sync::OnceLock;
6
7use regex::Regex;
8use relay_event_schema::processor::{self, ProcessingAction, ProcessingResult};
9use relay_event_schema::protocol::{Cookies, Query, Request};
10use relay_protocol::{Annotated, ErrorKind, Meta, Value};
11use url::Url;
12
13const ELLIPSIS: char = '\u{2026}';
14
15fn normalize_url(request: &mut Request) {
16    let url_string = match request.url.value_mut() {
17        Some(url_string) => url_string,
18        None => return,
19    };
20
21    // Special case: JavaScript SDK used to send an ellipsis character for
22    // truncated URLs. Canonical URLs do not contain UTF-8 characters in
23    // either the path, query string or fragment, so we replace it with
24    // three dots (which is the behavior of other SDKs). This effectively
25    // makes the string two characters longer, but it will be trimmed
26    // again later if it is too long in the end.
27    if url_string.ends_with(ELLIPSIS) {
28        url_string.truncate(url_string.len() - ELLIPSIS.len_utf8());
29        url_string.push_str("...");
30    }
31
32    match Url::parse(url_string) {
33        Ok(mut url) => {
34            // Separate the query string and fragment bits into dedicated fields. If
35            // both the URL and the fields have been set, the fields take precedence.
36            if request.query_string.value().is_none() {
37                let query: Query = url.query_pairs().collect();
38                if !query.is_empty() {
39                    request.query_string.set_value(Some(query));
40                }
41            }
42
43            if request.fragment.value().is_none() {
44                request
45                    .fragment
46                    .set_value(url.fragment().map(str::to_string));
47            }
48
49            url.set_query(None);
50            url.set_fragment(None);
51            if url.as_str() != url_string {
52                *url_string = url.into();
53            }
54        }
55        Err(_) => {
56            // The URL is invalid, but we can still apply heuristics to parse the query
57            // string and put the fragment in its own field.
58            if let Some(fragment_index) = url_string.find('#') {
59                let fragment = &url_string[fragment_index + 1..];
60                if !fragment.is_empty() && request.fragment.value().is_none() {
61                    request.fragment.set_value(Some(fragment.to_string()));
62                }
63                url_string.truncate(fragment_index);
64            }
65
66            if let Some(query_index) = url_string.find('?') {
67                let query_string = &url_string[query_index + 1..];
68                if !query_string.is_empty() && request.query_string.value().is_none() {
69                    let query = Query::parse(query_string);
70                    if !query.is_empty() {
71                        request.query_string.set_value(Some(query));
72                    }
73                }
74                url_string.truncate(query_index);
75            }
76        }
77    };
78}
79
80#[allow(clippy::ptr_arg)] // normalize_method must be &mut String for `apply`.
81fn normalize_method(method: &mut String, meta: &mut Meta) -> ProcessingResult {
82    method.make_ascii_uppercase();
83
84    static METHOD_RE: OnceLock<Regex> = OnceLock::new();
85    let regex = METHOD_RE.get_or_init(|| Regex::new(r"^[A-Z\-_]{3,32}$").unwrap());
86
87    if !meta.has_errors() && !regex.is_match(method) {
88        meta.add_error(ErrorKind::InvalidData);
89        return Err(ProcessingAction::DeleteValueSoft);
90    }
91
92    Ok(())
93}
94
95/// Decodes an urlencoded body.
96fn urlencoded_from_str(raw: &str) -> Option<Value> {
97    // Binary strings would be decoded, but we know url-encoded bodies are ASCII.
98    if !raw.is_ascii() {
99        return None;
100    }
101
102    // Avoid false positives with XML and partial JSON.
103    if raw.starts_with("<?xml") || raw.starts_with('{') || raw.starts_with('[') {
104        return None;
105    }
106
107    // serde_urlencoded always deserializes into `Value::Object`.
108    let object = match serde_urlencoded::from_str(raw) {
109        Ok(Value::Object(value)) => value,
110        _ => return None,
111    };
112
113    // `serde_urlencoded` can decode any string with valid characters into an object. However, we
114    // need to account for false-positives in the following cases:
115    //  - An empty string "" is decoded as empty object
116    //  - A string "foo" is decoded as {"foo": ""} (check for single empty value)
117    //  - A base64 encoded string "dGU=" also decodes with a single empty value
118    //  - A base64 encoded string "dA==" decodes as {"dA": "="} (check for single =)
119    let is_valid = object.len() > 1
120        || object
121            .values()
122            .next()
123            .and_then(Annotated::<Value>::as_str)
124            .is_some_and(|s| !matches!(s, "" | "="));
125
126    if is_valid {
127        Some(Value::Object(object))
128    } else {
129        None
130    }
131}
132
133fn parse_raw_data(request: &Request) -> Option<(&'static str, Value)> {
134    let raw = request.data.as_str()?;
135
136    // TODO: Try to decode base64 first
137
138    if let Ok(value) = serde_json::from_str(raw) {
139        Some(("application/json", value))
140    } else {
141        urlencoded_from_str(raw).map(|value| ("application/x-www-form-urlencoded", value))
142    }
143}
144
145fn normalize_data(request: &mut Request) {
146    // Always derive the `inferred_content_type` from the request body, even if there is a
147    // `Content-Type` header present. This value can technically be ingested (due to the schema) but
148    // should always be overwritten in normalization. Only if inference fails, fall back to the
149    // content type header.
150    if let Some((content_type, parsed_data)) = parse_raw_data(request) {
151        // Retain meta data on the body (e.g. trimming annotations) but remove anything on the
152        // inferred content type.
153        request.data.set_value(Some(parsed_data));
154        request.inferred_content_type = Annotated::from(content_type.to_string());
155    } else {
156        request.inferred_content_type = request
157            .headers
158            .value()
159            .and_then(|headers| headers.get_header("Content-Type"))
160            .map(|value| value.split(';').next().unwrap_or(value).to_string())
161            .into();
162    }
163}
164
165fn normalize_cookies(request: &mut Request) {
166    let headers = match request.headers.value_mut() {
167        Some(headers) => headers,
168        None => return,
169    };
170
171    if request.cookies.value().is_some() {
172        headers.remove("Cookie");
173        return;
174    }
175
176    let cookie_header = match headers.get_header("Cookie") {
177        Some(header) => header,
178        None => return,
179    };
180
181    if let Ok(new_cookies) = Cookies::parse(cookie_header) {
182        request.cookies = Annotated::from(new_cookies);
183        headers.remove("Cookie");
184    }
185}
186
187/// Normalizes the [`Request`] interface.
188///
189/// This function applies the following normalization rules:
190/// - The URL is truncated to 2048 characters.
191/// - The query string and fragment are extracted into dedicated fields.
192/// - The method is normalized to uppercase.
193/// - The data is parsed as JSON or urlencoded and put into the `data` field.
194/// - The `Content-Type` header is parsed and put into the `inferred_content_type` field.
195/// - The `Cookie` header is parsed and put into the `cookies` field.
196pub fn normalize_request(request: &mut Request) {
197    let _ = processor::apply(&mut request.method, normalize_method);
198    normalize_url(request);
199    normalize_data(request);
200    normalize_cookies(request);
201}
202
203#[cfg(test)]
204mod tests {
205    use relay_event_schema::protocol::{Headers, PairList};
206    use relay_protocol::Object;
207    use similar_asserts::assert_eq;
208
209    use super::*;
210
211    #[test]
212    fn test_url_truncation() {
213        let mut request = Request {
214            url: Annotated::new("http://example.com/path?foo#bar".to_string()),
215            ..Request::default()
216        };
217
218        normalize_request(&mut request);
219        assert_eq!(request.url.as_str(), Some("http://example.com/path"));
220    }
221
222    #[test]
223    fn test_url_truncation_reversed() {
224        let mut request = Request {
225            // The query string is empty and the fragment is "foo?bar" here
226            url: Annotated::new("http://example.com/path#foo?bar".to_string()),
227            ..Request::default()
228        };
229
230        normalize_request(&mut request);
231        assert_eq!(request.url.as_str(), Some("http://example.com/path"));
232    }
233
234    #[test]
235    fn test_url_with_ellipsis() {
236        let mut request = Request {
237            url: Annotated::new("http://example.com/path…".to_string()),
238            ..Request::default()
239        };
240
241        normalize_request(&mut request);
242        assert_eq!(request.url.as_str(), Some("http://example.com/path..."));
243    }
244
245    #[test]
246    fn test_url_with_qs_and_fragment() {
247        let mut request = Request {
248            url: Annotated::new("http://example.com/path?some=thing#else".to_string()),
249            ..Request::default()
250        };
251
252        normalize_request(&mut request);
253
254        assert_eq!(
255            request,
256            Request {
257                url: Annotated::new("http://example.com/path".to_string()),
258                query_string: Annotated::new(Query(PairList(vec![Annotated::new((
259                    Annotated::new("some".to_string()),
260                    Annotated::new("thing".to_string().into()),
261                )),]))),
262                fragment: Annotated::new("else".to_string()),
263                ..Request::default()
264            }
265        );
266    }
267
268    #[test]
269    fn test_url_only_path() {
270        let mut request = Request {
271            url: Annotated::from("metamask/popup.html#".to_string()),
272            ..Request::default()
273        };
274
275        normalize_request(&mut request);
276        assert_eq!(
277            request,
278            Request {
279                url: Annotated::new("metamask/popup.html".to_string()),
280                ..Request::default()
281            }
282        );
283    }
284
285    #[test]
286    fn test_url_punycoded() {
287        let mut request = Request {
288            url: Annotated::new("http://göögle.com/".to_string()),
289            ..Request::default()
290        };
291
292        normalize_request(&mut request);
293
294        assert_eq!(
295            request,
296            Request {
297                url: Annotated::new("http://xn--ggle-5qaa.com/".to_string()),
298                ..Request::default()
299            }
300        );
301    }
302
303    #[test]
304    fn test_url_precedence() {
305        let mut request = Request {
306            url: Annotated::new("http://example.com/path?completely=different#stuff".to_string()),
307            query_string: Annotated::new(Query(PairList(vec![Annotated::new((
308                Annotated::new("some".to_string()),
309                Annotated::new("thing".to_string().into()),
310            ))]))),
311            fragment: Annotated::new("else".to_string()),
312            ..Request::default()
313        };
314
315        normalize_request(&mut request);
316
317        assert_eq!(
318            request,
319            Request {
320                url: Annotated::new("http://example.com/path".to_string()),
321                query_string: Annotated::new(Query(PairList(vec![Annotated::new((
322                    Annotated::new("some".to_string()),
323                    Annotated::new("thing".to_string().into()),
324                )),]))),
325                fragment: Annotated::new("else".to_string()),
326                ..Request::default()
327            }
328        );
329    }
330
331    #[test]
332    fn test_query_string_empty_value() {
333        let mut request = Request {
334            url: Annotated::new("http://example.com/path?some".to_string()),
335            ..Request::default()
336        };
337
338        normalize_request(&mut request);
339
340        assert_eq!(
341            request,
342            Request {
343                url: Annotated::new("http://example.com/path".to_string()),
344                query_string: Annotated::new(Query(PairList(vec![Annotated::new((
345                    Annotated::new("some".to_string()),
346                    Annotated::new("".to_string().into()),
347                )),]))),
348                ..Request::default()
349            }
350        );
351    }
352
353    #[test]
354    fn test_cookies_in_header() {
355        let mut request = Request {
356            url: Annotated::new("http://example.com".to_string()),
357            headers: Annotated::new(Headers(PairList(vec![Annotated::new((
358                Annotated::new("Cookie".to_string().into()),
359                Annotated::new("a=b;c=d".to_string().into()),
360            ))]))),
361            ..Request::default()
362        };
363
364        normalize_request(&mut request);
365
366        assert_eq!(
367            request.cookies,
368            Annotated::new(Cookies(PairList(vec![
369                Annotated::new((
370                    Annotated::new("a".to_string()),
371                    Annotated::new("b".to_string()),
372                )),
373                Annotated::new((
374                    Annotated::new("c".to_string()),
375                    Annotated::new("d".to_string()),
376                )),
377            ])))
378        );
379
380        assert_eq!(request.headers.value().unwrap().get_header("Cookie"), None);
381    }
382
383    #[test]
384    fn test_cookies_in_header_dont_override_cookies() {
385        let mut request = Request {
386            url: Annotated::new("http://example.com".to_string()),
387            headers: Annotated::new(Headers(
388                vec![Annotated::new((
389                    Annotated::new("Cookie".to_string().into()),
390                    Annotated::new("a=b;c=d".to_string().into()),
391                ))]
392                .into(),
393            )),
394            cookies: Annotated::new(Cookies(PairList(vec![Annotated::new((
395                Annotated::new("foo".to_string()),
396                Annotated::new("bar".to_string()),
397            ))]))),
398            ..Request::default()
399        };
400
401        normalize_request(&mut request);
402
403        assert_eq!(
404            request.cookies,
405            Annotated::new(Cookies(PairList(vec![Annotated::new((
406                Annotated::new("foo".to_string()),
407                Annotated::new("bar".to_string()),
408            ))])))
409        );
410
411        // Cookie header is removed when explicit cookies are given
412        assert_eq!(request.headers.value().unwrap().get_header("Cookie"), None);
413    }
414
415    #[test]
416    fn test_method_invalid() {
417        let mut request = Request {
418            method: Annotated::new("!!!!".to_string()),
419            ..Request::default()
420        };
421
422        normalize_request(&mut request);
423
424        assert_eq!(request.method.value(), None);
425    }
426
427    #[test]
428    fn test_method_valid() {
429        let mut request = Request {
430            method: Annotated::new("POST".to_string()),
431            ..Request::default()
432        };
433
434        normalize_request(&mut request);
435
436        assert_eq!(request.method.as_str(), Some("POST"));
437    }
438
439    #[test]
440    fn test_infer_json() {
441        let mut request = Request {
442            data: Annotated::from(Value::String(r#"{"foo":"bar"}"#.to_string())),
443            ..Request::default()
444        };
445
446        let mut expected_value = Object::new();
447        expected_value.insert(
448            "foo".to_string(),
449            Annotated::from(Value::String("bar".into())),
450        );
451
452        normalize_request(&mut request);
453        assert_eq!(
454            request.inferred_content_type.as_str(),
455            Some("application/json")
456        );
457        assert_eq!(request.data.value(), Some(&Value::Object(expected_value)));
458    }
459
460    #[test]
461    fn test_broken_json_with_fallback() {
462        let mut request = Request {
463            data: Annotated::from(Value::String(r#"{"foo":"b"#.to_string())),
464            headers: Annotated::from(Headers(PairList(vec![Annotated::new((
465                Annotated::new("Content-Type".to_string().into()),
466                Annotated::new("text/plain; encoding=utf-8".to_string().into()),
467            ))]))),
468            ..Request::default()
469        };
470
471        normalize_request(&mut request);
472        assert_eq!(request.inferred_content_type.as_str(), Some("text/plain"));
473        assert_eq!(request.data.as_str(), Some(r#"{"foo":"b"#));
474    }
475
476    #[test]
477    fn test_broken_json_without_fallback() {
478        let mut request = Request {
479            data: Annotated::from(Value::String(r#"{"foo":"b"#.to_string())),
480            ..Request::default()
481        };
482
483        normalize_request(&mut request);
484        assert_eq!(request.inferred_content_type.value(), None);
485        assert_eq!(request.data.as_str(), Some(r#"{"foo":"b"#));
486    }
487
488    #[test]
489    fn test_infer_url_encoded() {
490        let mut request = Request {
491            data: Annotated::from(Value::String(r#"foo=bar"#.to_string())),
492            ..Request::default()
493        };
494
495        let mut expected_value = Object::new();
496        expected_value.insert(
497            "foo".to_string(),
498            Annotated::from(Value::String("bar".into())),
499        );
500
501        normalize_request(&mut request);
502        assert_eq!(
503            request.inferred_content_type.as_str(),
504            Some("application/x-www-form-urlencoded")
505        );
506        assert_eq!(request.data.value(), Some(&Value::Object(expected_value)));
507    }
508
509    #[test]
510    fn test_infer_url_false_positive() {
511        let mut request = Request {
512            data: Annotated::from(Value::String("dGU=".to_string())),
513            ..Request::default()
514        };
515
516        normalize_request(&mut request);
517        assert_eq!(request.inferred_content_type.value(), None);
518        assert_eq!(request.data.as_str(), Some("dGU="));
519    }
520
521    #[test]
522    fn test_infer_url_encoded_base64() {
523        let mut request = Request {
524            data: Annotated::from(Value::String("dA==".to_string())),
525            ..Request::default()
526        };
527
528        normalize_request(&mut request);
529        assert_eq!(request.inferred_content_type.value(), None);
530        assert_eq!(request.data.as_str(), Some("dA=="));
531    }
532
533    #[test]
534    fn test_infer_xml() {
535        let mut request = Request {
536            data: Annotated::from(Value::String("<?xml version=\"1.0\" ?>".to_string())),
537            ..Request::default()
538        };
539
540        normalize_request(&mut request);
541        assert_eq!(request.inferred_content_type.value(), None);
542        assert_eq!(request.data.as_str(), Some("<?xml version=\"1.0\" ?>"));
543    }
544
545    #[test]
546    fn test_infer_binary() {
547        let mut request = Request {
548            data: Annotated::from(Value::String("\u{001f}1\u{0000}\u{0000}".to_string())),
549            ..Request::default()
550        };
551
552        normalize_request(&mut request);
553        assert_eq!(request.inferred_content_type.value(), None);
554        assert_eq!(request.data.as_str(), Some("\u{001f}1\u{0000}\u{0000}"));
555    }
556}