relay_event_normalization/eap/
mod.rs

1//! Event normalization and processing for attribute (EAP) based payloads.
2//!
3//! A central place for all modifications/normalizations for attributes.
4
5use std::net::IpAddr;
6
7use chrono::{DateTime, Utc};
8use relay_common::time::UnixTimestamp;
9use relay_conventions::{
10    AttributeInfo, BROWSER_NAME, BROWSER_VERSION, CLIENT_ADDRESS, OBSERVED_TIMESTAMP_NANOS,
11    USER_AGENT_ORIGINAL, USER_GEO_CITY, USER_GEO_COUNTRY_CODE, USER_GEO_REGION,
12    USER_GEO_SUBDIVISION, WriteBehavior,
13};
14use relay_event_schema::protocol::{AttributeType, Attributes, BrowserContext, Geo};
15use relay_protocol::{Annotated, ErrorKind, Meta, Remark, RemarkType, Value};
16
17use crate::{ClientHints, FromUserAgentInfo as _, RawUserAgentInfo};
18
19/// Normalizes/validates all attribute types.
20///
21/// Removes and marks all attributes with an error for which the specified [`AttributeType`]
22/// does not match the value.
23pub fn normalize_attribute_types(attributes: &mut Annotated<Attributes>) {
24    let Some(attributes) = attributes.value_mut() else {
25        return;
26    };
27
28    let attributes = attributes.0.values_mut();
29    for attribute in attributes {
30        use AttributeType::*;
31
32        let Some(inner) = attribute.value_mut() else {
33            continue;
34        };
35
36        match (&mut inner.value.ty, &mut inner.value.value) {
37            (Annotated(Some(Boolean), _), Annotated(Some(Value::Bool(_)), _)) => (),
38            (Annotated(Some(Integer), _), Annotated(Some(Value::I64(_)), _)) => (),
39            (Annotated(Some(Integer), _), Annotated(Some(Value::U64(_)), _)) => (),
40            (Annotated(Some(Double), _), Annotated(Some(Value::I64(_)), _)) => (),
41            (Annotated(Some(Double), _), Annotated(Some(Value::U64(_)), _)) => (),
42            (Annotated(Some(Double), _), Annotated(Some(Value::F64(_)), _)) => (),
43            (Annotated(Some(String), _), Annotated(Some(Value::String(_)), _)) => (),
44            // Note: currently the mapping to Kafka requires that invalid or unknown combinations
45            // of types and values are removed from the mapping.
46            //
47            // Usually Relay would only modify the offending values, but for now, until there
48            // is better support in the pipeline here, we need to remove the entire attribute.
49            (Annotated(Some(Unknown(_)), _), _) => {
50                let original = attribute.value_mut().take();
51                attribute.meta_mut().add_error(ErrorKind::InvalidData);
52                attribute.meta_mut().set_original_value(original);
53            }
54            (Annotated(Some(_), _), Annotated(Some(_), _)) => {
55                let original = attribute.value_mut().take();
56                attribute.meta_mut().add_error(ErrorKind::InvalidData);
57                attribute.meta_mut().set_original_value(original);
58            }
59            (Annotated(None, _), _) | (_, Annotated(None, _)) => {
60                let original = attribute.value_mut().take();
61                attribute.meta_mut().add_error(ErrorKind::MissingAttribute);
62                attribute.meta_mut().set_original_value(original);
63            }
64        }
65    }
66}
67
68/// Adds the `received` time to the attributes.
69pub fn normalize_received(attributes: &mut Annotated<Attributes>, received: DateTime<Utc>) {
70    attributes
71        .get_or_insert_with(Default::default)
72        .insert_if_missing(OBSERVED_TIMESTAMP_NANOS, || {
73            received
74                .timestamp_nanos_opt()
75                .unwrap_or_else(|| UnixTimestamp::now().as_nanos() as i64)
76                .to_string()
77        });
78}
79
80/// Normalizes the user agent/client information into [`Attributes`].
81///
82/// Does not modify the attributes if there is already browser information present,
83/// to preserve original values.
84pub fn normalize_user_agent(
85    attributes: &mut Annotated<Attributes>,
86    client_user_agent: Option<&str>,
87    client_hints: ClientHints<&str>,
88) {
89    let attributes = attributes.get_or_insert_with(Default::default);
90
91    if attributes.contains_key(BROWSER_NAME) || attributes.contains_key(BROWSER_VERSION) {
92        return;
93    }
94
95    // Prefer the stored/explicitly sent user agent over the user agent from the client/transport.
96    let user_agent = attributes
97        .get_value(USER_AGENT_ORIGINAL)
98        .and_then(|v| v.as_str())
99        .or(client_user_agent);
100
101    let Some(context) = BrowserContext::from_hints_or_ua(&RawUserAgentInfo {
102        user_agent,
103        client_hints,
104    }) else {
105        return;
106    };
107
108    attributes.insert_if_missing(BROWSER_NAME, || context.name);
109    attributes.insert_if_missing(BROWSER_VERSION, || context.version);
110}
111
112/// Normalizes the client address into [`Attributes`].
113///
114/// Infers the client ip from the client information which was provided to Relay, if the SDK
115/// indicates the client ip should be inferred by setting it to `{{auto}}`.
116///
117/// This requires cooperation from SDKs as inferring a client ip only works in non-server
118/// environments, where the user/client device is also the device sending the item.
119pub fn normalize_client_address(attributes: &mut Annotated<Attributes>, client_ip: Option<IpAddr>) {
120    let Some(attributes) = attributes.value_mut() else {
121        return;
122    };
123    let Some(client_ip) = client_ip else {
124        return;
125    };
126
127    let client_address = attributes
128        .get_value(CLIENT_ADDRESS)
129        .and_then(|v| v.as_str());
130
131    if client_address == Some("{{auto}}") {
132        attributes.insert(CLIENT_ADDRESS, client_ip.to_string());
133    }
134}
135
136/// Normalizes the user's geographical information into [`Attributes`].
137///
138/// Does not modify the attributes if there is already user geo information present,
139/// to preserve original values.
140pub fn normalize_user_geo(
141    attributes: &mut Annotated<Attributes>,
142    info: impl FnOnce() -> Option<Geo>,
143) {
144    let attributes = attributes.get_or_insert_with(Default::default);
145
146    if [
147        USER_GEO_COUNTRY_CODE,
148        USER_GEO_CITY,
149        USER_GEO_SUBDIVISION,
150        USER_GEO_REGION,
151    ]
152    .into_iter()
153    .any(|a| attributes.contains_key(a))
154    {
155        return;
156    }
157
158    let Some(geo) = info() else {
159        return;
160    };
161
162    attributes.insert_if_missing(USER_GEO_COUNTRY_CODE, || geo.country_code);
163    attributes.insert_if_missing(USER_GEO_CITY, || geo.city);
164    attributes.insert_if_missing(USER_GEO_SUBDIVISION, || geo.subdivision);
165    attributes.insert_if_missing(USER_GEO_REGION, || geo.region);
166}
167
168/// Normalizes deprecated attributes according to `sentry-conventions`.
169///
170/// Attributes with a status of `"normalize"` will be moved to their replacement name.
171/// If there is already a value present under the replacement name, it will be left alone,
172/// but the deprecated attribute is removed anyway.
173///
174/// Attributes with a status of `"backfill"` will be copied to their replacement name if the
175/// replacement name is not present. In any case, the original name is left alone.
176pub fn normalize_attribute_names(attributes: &mut Annotated<Attributes>) {
177    normalize_attribute_names_inner(attributes, relay_conventions::attribute_info)
178}
179
180fn normalize_attribute_names_inner(
181    attributes: &mut Annotated<Attributes>,
182    attribute_info: fn(&str) -> Option<&'static AttributeInfo>,
183) {
184    let Some(attributes) = attributes.value_mut() else {
185        return;
186    };
187
188    let attribute_names: Vec<_> = attributes.0.keys().cloned().collect();
189
190    for name in attribute_names {
191        let Some(attribute_info) = attribute_info(&name) else {
192            continue;
193        };
194
195        match attribute_info.write_behavior {
196            WriteBehavior::CurrentName => continue,
197            WriteBehavior::NewName(new_name) => {
198                let Some(old_attribute) = attributes.0.get_mut(&name) else {
199                    continue;
200                };
201
202                let mut meta = Meta::default();
203                // TODO: Possibly add a new RemarkType for "renamed/moved"
204                meta.add_remark(Remark::new(RemarkType::Removed, "attribute.deprecated"));
205                let new_attribute = std::mem::replace(old_attribute, Annotated(None, meta));
206
207                if !attributes.contains_key(new_name) {
208                    attributes.0.insert(new_name.to_owned(), new_attribute);
209                }
210            }
211            WriteBehavior::BothNames(new_name) => {
212                if !attributes.contains_key(new_name)
213                    && let Some(current_attribute) = attributes.0.get(&name).cloned()
214                {
215                    attributes.0.insert(new_name.to_owned(), current_attribute);
216                }
217            }
218        }
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use relay_protocol::SerializableAnnotated;
225
226    use super::*;
227
228    #[test]
229    fn test_normalize_received_none() {
230        let mut attributes = Default::default();
231
232        normalize_received(
233            &mut attributes,
234            DateTime::from_timestamp_nanos(1_234_201_337),
235        );
236
237        insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#"
238        {
239          "sentry.observed_timestamp_nanos": {
240            "type": "string",
241            "value": "1234201337"
242          }
243        }
244        "#);
245    }
246
247    #[test]
248    fn test_normalize_received_existing() {
249        let mut attributes = Annotated::from_json(
250            r#"{
251          "sentry.observed_timestamp_nanos": {
252            "type": "string",
253            "value": "111222333"
254          }
255        }"#,
256        )
257        .unwrap();
258
259        normalize_received(
260            &mut attributes,
261            DateTime::from_timestamp_nanos(1_234_201_337),
262        );
263
264        insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r###"
265        {
266          "sentry.observed_timestamp_nanos": {
267            "type": "string",
268            "value": "111222333"
269          }
270        }
271        "###);
272    }
273
274    #[test]
275    fn test_process_attribute_types() {
276        let json = r#"{
277            "valid_bool": {
278                "type": "boolean",
279                "value": true
280            },
281            "valid_int_i64": {
282                "type": "integer",
283                "value": -42
284            },
285            "valid_int_u64": {
286                "type": "integer",
287                "value": 42
288            },
289            "valid_int_from_string": {
290                "type": "integer",
291                "value": "42"
292            },
293            "valid_double": {
294                "type": "double",
295                "value": 42.5
296            },
297            "double_with_i64": {
298                "type": "double",
299                "value": -42
300            },
301            "valid_double_with_u64": {
302                "type": "double",
303                "value": 42
304            },
305            "valid_string": {
306                "type": "string",
307                "value": "test"
308            },
309            "valid_string_with_other": {
310                "type": "string",
311                "value": "test",
312                "some_other_field": "some_other_value"
313            },
314            "unknown_type": {
315                "type": "custom",
316                "value": "test"
317            },
318            "invalid_int_from_invalid_string": {
319                "type": "integer",
320                "value": "abc"
321            },
322            "missing_type": {
323                "value": "value with missing type"
324            },
325            "missing_value": {
326                "type": "string"
327            }
328        }"#;
329
330        let mut attributes = Annotated::<Attributes>::from_json(json).unwrap();
331        normalize_attribute_types(&mut attributes);
332
333        insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r###"
334        {
335          "double_with_i64": {
336            "type": "double",
337            "value": -42
338          },
339          "invalid_int_from_invalid_string": null,
340          "missing_type": null,
341          "missing_value": null,
342          "unknown_type": null,
343          "valid_bool": {
344            "type": "boolean",
345            "value": true
346          },
347          "valid_double": {
348            "type": "double",
349            "value": 42.5
350          },
351          "valid_double_with_u64": {
352            "type": "double",
353            "value": 42
354          },
355          "valid_int_from_string": null,
356          "valid_int_i64": {
357            "type": "integer",
358            "value": -42
359          },
360          "valid_int_u64": {
361            "type": "integer",
362            "value": 42
363          },
364          "valid_string": {
365            "type": "string",
366            "value": "test"
367          },
368          "valid_string_with_other": {
369            "type": "string",
370            "value": "test",
371            "some_other_field": "some_other_value"
372          },
373          "_meta": {
374            "invalid_int_from_invalid_string": {
375              "": {
376                "err": [
377                  "invalid_data"
378                ],
379                "val": {
380                  "type": "integer",
381                  "value": "abc"
382                }
383              }
384            },
385            "missing_type": {
386              "": {
387                "err": [
388                  "missing_attribute"
389                ],
390                "val": {
391                  "type": null,
392                  "value": "value with missing type"
393                }
394              }
395            },
396            "missing_value": {
397              "": {
398                "err": [
399                  "missing_attribute"
400                ],
401                "val": {
402                  "type": "string",
403                  "value": null
404                }
405              }
406            },
407            "unknown_type": {
408              "": {
409                "err": [
410                  "invalid_data"
411                ],
412                "val": {
413                  "type": "custom",
414                  "value": "test"
415                }
416              }
417            },
418            "valid_int_from_string": {
419              "": {
420                "err": [
421                  "invalid_data"
422                ],
423                "val": {
424                  "type": "integer",
425                  "value": "42"
426                }
427              }
428            }
429          }
430        }
431        "###);
432    }
433
434    #[test]
435    fn test_normalize_user_agent_none() {
436        let mut attributes = Default::default();
437        normalize_user_agent(
438            &mut attributes,
439            Some(
440                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
441            ),
442            ClientHints::default(),
443        );
444
445        insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#"
446        {
447          "sentry.browser.name": {
448            "type": "string",
449            "value": "Chrome"
450          },
451          "sentry.browser.version": {
452            "type": "string",
453            "value": "131.0.0"
454          }
455        }
456        "#);
457    }
458
459    #[test]
460    fn test_normalize_user_agent_existing() {
461        let mut attributes = Annotated::from_json(
462            r#"{
463          "sentry.browser.name": {
464            "type": "string",
465            "value": "Very Special"
466          },
467          "sentry.browser.version": {
468            "type": "string",
469            "value": "13.3.7"
470          }
471        }"#,
472        )
473        .unwrap();
474
475        normalize_user_agent(
476            &mut attributes,
477            Some(
478                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
479            ),
480            ClientHints::default(),
481        );
482
483        insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#"
484        {
485          "sentry.browser.name": {
486            "type": "string",
487            "value": "Very Special"
488          },
489          "sentry.browser.version": {
490            "type": "string",
491            "value": "13.3.7"
492          }
493        }
494        "#,
495        );
496    }
497
498    #[test]
499    fn test_normalize_user_geo_none() {
500        let mut attributes = Default::default();
501
502        normalize_user_geo(&mut attributes, || {
503            Some(Geo {
504                country_code: "XY".to_owned().into(),
505                city: "Foo Hausen".to_owned().into(),
506                subdivision: Annotated::empty(),
507                region: "Illu".to_owned().into(),
508                other: Default::default(),
509            })
510        });
511
512        insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#"
513        {
514          "user.geo.city": {
515            "type": "string",
516            "value": "Foo Hausen"
517          },
518          "user.geo.country_code": {
519            "type": "string",
520            "value": "XY"
521          },
522          "user.geo.region": {
523            "type": "string",
524            "value": "Illu"
525          }
526        }
527        "#);
528    }
529
530    #[test]
531    fn test_normalize_user_geo_existing() {
532        let mut attributes = Annotated::from_json(
533            r#"{
534          "user.geo.city": {
535            "type": "string",
536            "value": "Foo Hausen"
537          }
538        }"#,
539        )
540        .unwrap();
541
542        normalize_user_geo(&mut attributes, || unreachable!());
543
544        insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#"
545        {
546          "user.geo.city": {
547            "type": "string",
548            "value": "Foo Hausen"
549          }
550        }
551        "#,
552        );
553    }
554
555    #[test]
556    fn test_normalize_attributes() {
557        fn mock_attribute_info(name: &str) -> Option<&'static AttributeInfo> {
558            use relay_conventions::Pii;
559
560            match name {
561                "replace.empty" => Some(&AttributeInfo {
562                    write_behavior: WriteBehavior::NewName("replaced"),
563                    pii: Pii::Maybe,
564                    aliases: &["replaced"],
565                }),
566                "replace.existing" => Some(&AttributeInfo {
567                    write_behavior: WriteBehavior::NewName("not.replaced"),
568                    pii: Pii::Maybe,
569                    aliases: &["not.replaced"],
570                }),
571                "backfill.empty" => Some(&AttributeInfo {
572                    write_behavior: WriteBehavior::BothNames("backfilled"),
573                    pii: Pii::Maybe,
574                    aliases: &["backfilled"],
575                }),
576                "backfill.existing" => Some(&AttributeInfo {
577                    write_behavior: WriteBehavior::BothNames("not.backfilled"),
578                    pii: Pii::Maybe,
579                    aliases: &["not.backfilled"],
580                }),
581                _ => None,
582            }
583        }
584
585        let mut attributes = Annotated::new(Attributes::from([
586            (
587                "replace.empty".to_owned(),
588                Annotated::new("Should be moved".to_owned().into()),
589            ),
590            (
591                "replace.existing".to_owned(),
592                Annotated::new("Should be removed".to_owned().into()),
593            ),
594            (
595                "not.replaced".to_owned(),
596                Annotated::new("Should be left alone".to_owned().into()),
597            ),
598            (
599                "backfill.empty".to_owned(),
600                Annotated::new("Should be copied".to_owned().into()),
601            ),
602            (
603                "backfill.existing".to_owned(),
604                Annotated::new("Should be left alone".to_owned().into()),
605            ),
606            (
607                "not.backfilled".to_owned(),
608                Annotated::new("Should be left alone".to_owned().into()),
609            ),
610        ]));
611
612        normalize_attribute_names_inner(&mut attributes, mock_attribute_info);
613
614        insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r###"
615        {
616          "backfill.empty": {
617            "type": "string",
618            "value": "Should be copied"
619          },
620          "backfill.existing": {
621            "type": "string",
622            "value": "Should be left alone"
623          },
624          "backfilled": {
625            "type": "string",
626            "value": "Should be copied"
627          },
628          "not.backfilled": {
629            "type": "string",
630            "value": "Should be left alone"
631          },
632          "not.replaced": {
633            "type": "string",
634            "value": "Should be left alone"
635          },
636          "replace.empty": null,
637          "replace.existing": null,
638          "replaced": {
639            "type": "string",
640            "value": "Should be moved"
641          },
642          "_meta": {
643            "replace.empty": {
644              "": {
645                "rem": [
646                  [
647                    "attribute.deprecated",
648                    "x"
649                  ]
650                ]
651              }
652            },
653            "replace.existing": {
654              "": {
655                "rem": [
656                  [
657                    "attribute.deprecated",
658                    "x"
659                  ]
660                ]
661              }
662            }
663          }
664        }
665        "###);
666    }
667}