relay_event_normalization/eap/
mod.rs

1//! Event normalization and processing for attribute (EAP) based payloads.
2//!
3//! A central place for all modifications/normalizations for attributes.
4
5use chrono::{DateTime, Utc};
6use relay_common::time::UnixTimestamp;
7use relay_conventions::{
8    BROWSER_NAME, BROWSER_VERSION, OBSERVED_TIMESTAMP_NANOS, USER_GEO_CITY, USER_GEO_COUNTRY_CODE,
9    USER_GEO_REGION, USER_GEO_SUBDIVISION,
10};
11use relay_event_schema::protocol::{AttributeType, Attributes, BrowserContext, Geo};
12use relay_protocol::{Annotated, ErrorKind, Value};
13
14use crate::{ClientHints, FromUserAgentInfo as _, RawUserAgentInfo};
15
16/// Normalizes/validates all attribute types.
17///
18/// Removes and marks all attributes with an error for which the specified [`AttributeType`]
19/// does not match the value.
20pub fn normalize_attribute_types(attributes: &mut Annotated<Attributes>) {
21    let Some(attributes) = attributes.value_mut() else {
22        return;
23    };
24
25    let attributes = attributes.iter_mut().map(|(_, attr)| attr);
26    for attribute in attributes {
27        use AttributeType::*;
28
29        let Some(inner) = attribute.value_mut() else {
30            continue;
31        };
32
33        match (&mut inner.value.ty, &mut inner.value.value) {
34            (Annotated(Some(Boolean), _), Annotated(Some(Value::Bool(_)), _)) => (),
35            (Annotated(Some(Integer), _), Annotated(Some(Value::I64(_)), _)) => (),
36            (Annotated(Some(Integer), _), Annotated(Some(Value::U64(_)), _)) => (),
37            (Annotated(Some(Double), _), Annotated(Some(Value::I64(_)), _)) => (),
38            (Annotated(Some(Double), _), Annotated(Some(Value::U64(_)), _)) => (),
39            (Annotated(Some(Double), _), Annotated(Some(Value::F64(_)), _)) => (),
40            (Annotated(Some(String), _), Annotated(Some(Value::String(_)), _)) => (),
41            // Note: currently the mapping to Kafka requires that invalid or unknown combinations
42            // of types and values are removed from the mapping.
43            //
44            // Usually Relay would only modify the offending values, but for now, until there
45            // is better support in the pipeline here, we need to remove the entire attribute.
46            (Annotated(Some(Unknown(_)), _), _) => {
47                let original = attribute.value_mut().take();
48                attribute.meta_mut().add_error(ErrorKind::InvalidData);
49                attribute.meta_mut().set_original_value(original);
50            }
51            (Annotated(Some(_), _), Annotated(Some(_), _)) => {
52                let original = attribute.value_mut().take();
53                attribute.meta_mut().add_error(ErrorKind::InvalidData);
54                attribute.meta_mut().set_original_value(original);
55            }
56            (Annotated(None, _), _) | (_, Annotated(None, _)) => {
57                let original = attribute.value_mut().take();
58                attribute.meta_mut().add_error(ErrorKind::MissingAttribute);
59                attribute.meta_mut().set_original_value(original);
60            }
61        }
62    }
63}
64
65/// Adds the `received` time to the attributes.
66pub fn normalize_received(attributes: &mut Annotated<Attributes>, received: DateTime<Utc>) {
67    attributes
68        .get_or_insert_with(Default::default)
69        .insert_if_missing(OBSERVED_TIMESTAMP_NANOS, || {
70            received
71                .timestamp_nanos_opt()
72                .unwrap_or_else(|| UnixTimestamp::now().as_nanos() as i64)
73                .to_string()
74        });
75}
76
77/// Normalizes the user agent/client information into [`Attributes`].
78///
79/// Does not modify the attributes if there is already browser information present,
80/// to preserve original values.
81pub fn normalize_user_agent(
82    attributes: &mut Annotated<Attributes>,
83    user_agent: Option<&str>,
84    client_hints: ClientHints<&str>,
85) {
86    let attributes = attributes.get_or_insert_with(Default::default);
87
88    if attributes.contains_key(BROWSER_NAME) || attributes.contains_key(BROWSER_VERSION) {
89        return;
90    }
91
92    let Some(context) = BrowserContext::from_hints_or_ua(&RawUserAgentInfo {
93        user_agent,
94        client_hints,
95    }) else {
96        return;
97    };
98
99    attributes.insert_if_missing(BROWSER_NAME, || context.name);
100    attributes.insert_if_missing(BROWSER_VERSION, || context.version);
101}
102
103/// Normalizes the user's geographical information into [`Attributes`].
104///
105/// Does not modify the attributes if there is already user geo information present,
106/// to preserve original values.
107pub fn normalize_user_geo(
108    attributes: &mut Annotated<Attributes>,
109    info: impl FnOnce() -> Option<Geo>,
110) {
111    let attributes = attributes.get_or_insert_with(Default::default);
112
113    if [
114        USER_GEO_COUNTRY_CODE,
115        USER_GEO_CITY,
116        USER_GEO_SUBDIVISION,
117        USER_GEO_REGION,
118    ]
119    .into_iter()
120    .any(|a| attributes.contains_key(a))
121    {
122        return;
123    }
124
125    let Some(geo) = info() else {
126        return;
127    };
128
129    attributes.insert_if_missing(USER_GEO_COUNTRY_CODE, || geo.country_code);
130    attributes.insert_if_missing(USER_GEO_CITY, || geo.city);
131    attributes.insert_if_missing(USER_GEO_SUBDIVISION, || geo.subdivision);
132    attributes.insert_if_missing(USER_GEO_REGION, || geo.region);
133}
134
135#[cfg(test)]
136mod tests {
137    use relay_protocol::SerializableAnnotated;
138
139    use super::*;
140
141    #[test]
142    fn test_normalize_received_none() {
143        let mut attributes = Default::default();
144
145        normalize_received(
146            &mut attributes,
147            DateTime::from_timestamp_nanos(1_234_201_337),
148        );
149
150        insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#"
151        {
152          "sentry.observed_timestamp_nanos": {
153            "type": "string",
154            "value": "1234201337"
155          }
156        }
157        "#);
158    }
159
160    #[test]
161    fn test_normalize_received_existing() {
162        let mut attributes = Annotated::from_json(
163            r#"{
164          "sentry.observed_timestamp_nanos": {
165            "type": "string",
166            "value": "111222333"
167          }
168        }"#,
169        )
170        .unwrap();
171
172        normalize_received(
173            &mut attributes,
174            DateTime::from_timestamp_nanos(1_234_201_337),
175        );
176
177        insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#"
178        {
179          "sentry.observed_timestamp_nanos": {
180            "type": "string",
181            "value": "111222333"
182          }
183        }
184        "#);
185    }
186
187    #[test]
188    fn test_process_attribute_types() {
189        let json = r#"{
190            "valid_bool": {
191                "type": "boolean",
192                "value": true
193            },
194            "valid_int_i64": {
195                "type": "integer",
196                "value": -42
197            },
198            "valid_int_u64": {
199                "type": "integer",
200                "value": 42
201            },
202            "valid_int_from_string": {
203                "type": "integer",
204                "value": "42"
205            },
206            "valid_double": {
207                "type": "double",
208                "value": 42.5
209            },
210            "double_with_i64": {
211                "type": "double",
212                "value": -42
213            },
214            "valid_double_with_u64": {
215                "type": "double",
216                "value": 42
217            },
218            "valid_string": {
219                "type": "string",
220                "value": "test"
221            },
222            "valid_string_with_other": {
223                "type": "string",
224                "value": "test",
225                "some_other_field": "some_other_value"
226            },
227            "unknown_type": {
228                "type": "custom",
229                "value": "test"
230            },
231            "invalid_int_from_invalid_string": {
232                "type": "integer",
233                "value": "abc"
234            },
235            "missing_type": {
236                "value": "value with missing type"
237            },
238            "missing_value": {
239                "type": "string"
240            }
241        }"#;
242
243        let mut attributes = Annotated::<Attributes>::from_json(json).unwrap();
244        normalize_attribute_types(&mut attributes);
245
246        insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r###"
247        {
248          "double_with_i64": {
249            "type": "double",
250            "value": -42
251          },
252          "invalid_int_from_invalid_string": null,
253          "missing_type": null,
254          "missing_value": null,
255          "unknown_type": null,
256          "valid_bool": {
257            "type": "boolean",
258            "value": true
259          },
260          "valid_double": {
261            "type": "double",
262            "value": 42.5
263          },
264          "valid_double_with_u64": {
265            "type": "double",
266            "value": 42
267          },
268          "valid_int_from_string": null,
269          "valid_int_i64": {
270            "type": "integer",
271            "value": -42
272          },
273          "valid_int_u64": {
274            "type": "integer",
275            "value": 42
276          },
277          "valid_string": {
278            "type": "string",
279            "value": "test"
280          },
281          "valid_string_with_other": {
282            "type": "string",
283            "value": "test",
284            "some_other_field": "some_other_value"
285          },
286          "_meta": {
287            "invalid_int_from_invalid_string": {
288              "": {
289                "err": [
290                  "invalid_data"
291                ],
292                "val": {
293                  "type": "integer",
294                  "value": "abc"
295                }
296              }
297            },
298            "missing_type": {
299              "": {
300                "err": [
301                  "missing_attribute"
302                ],
303                "val": {
304                  "type": null,
305                  "value": "value with missing type"
306                }
307              }
308            },
309            "missing_value": {
310              "": {
311                "err": [
312                  "missing_attribute"
313                ],
314                "val": {
315                  "type": "string",
316                  "value": null
317                }
318              }
319            },
320            "unknown_type": {
321              "": {
322                "err": [
323                  "invalid_data"
324                ],
325                "val": {
326                  "type": "custom",
327                  "value": "test"
328                }
329              }
330            },
331            "valid_int_from_string": {
332              "": {
333                "err": [
334                  "invalid_data"
335                ],
336                "val": {
337                  "type": "integer",
338                  "value": "42"
339                }
340              }
341            }
342          }
343        }
344        "###);
345    }
346
347    #[test]
348    fn test_normalize_user_agent_none() {
349        let mut attributes = Default::default();
350        normalize_user_agent(
351            &mut attributes,
352            Some(
353                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
354            ),
355            ClientHints::default(),
356        );
357
358        insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#"
359        {
360          "sentry.browser.name": {
361            "type": "string",
362            "value": "Chrome"
363          },
364          "sentry.browser.version": {
365            "type": "string",
366            "value": "131.0.0"
367          }
368        }
369        "#);
370    }
371
372    #[test]
373    fn test_normalize_user_agent_existing() {
374        let mut attributes = Annotated::from_json(
375            r#"{
376          "sentry.browser.name": {
377            "type": "string",
378            "value": "Very Special"
379          },
380          "sentry.browser.version": {
381            "type": "string",
382            "value": "13.3.7"
383          }
384        }"#,
385        )
386        .unwrap();
387
388        normalize_user_agent(
389            &mut attributes,
390            Some(
391                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
392            ),
393            ClientHints::default(),
394        );
395
396        insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#"
397        {
398          "sentry.browser.name": {
399            "type": "string",
400            "value": "Very Special"
401          },
402          "sentry.browser.version": {
403            "type": "string",
404            "value": "13.3.7"
405          }
406        }
407        "#,
408        );
409    }
410
411    #[test]
412    fn test_normalize_user_geo_none() {
413        let mut attributes = Default::default();
414
415        normalize_user_geo(&mut attributes, || {
416            Some(Geo {
417                country_code: "XY".to_owned().into(),
418                city: "Foo Hausen".to_owned().into(),
419                subdivision: Annotated::empty(),
420                region: "Illu".to_owned().into(),
421                other: Default::default(),
422            })
423        });
424
425        insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#"
426        {
427          "user.geo.city": {
428            "type": "string",
429            "value": "Foo Hausen"
430          },
431          "user.geo.country_code": {
432            "type": "string",
433            "value": "XY"
434          },
435          "user.geo.region": {
436            "type": "string",
437            "value": "Illu"
438          }
439        }
440        "#);
441    }
442
443    #[test]
444    fn test_normalize_user_geo_existing() {
445        let mut attributes = Annotated::from_json(
446            r#"{
447          "user.geo.city": {
448            "type": "string",
449            "value": "Foo Hausen"
450          }
451        }"#,
452        )
453        .unwrap();
454
455        normalize_user_geo(&mut attributes, || unreachable!());
456
457        insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#"
458        {
459          "user.geo.city": {
460            "type": "string",
461            "value": "Foo Hausen"
462          }
463        }
464        "#,
465        );
466    }
467}