relay_event_normalization/
regexes.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3
4/// Contains multiple capture groups which will be used as a replace placeholder.
5///
6/// This regex is inspired by one used for grouping:
7/// <https://github.com/getsentry/sentry/blob/6ba59023a78bfe033e48ea4e035b64710a905c6b/src/sentry/grouping/strategies/message.py#L16-L97>
8pub static TRANSACTION_NAME_NORMALIZER_REGEX: Lazy<Regex> = Lazy::new(|| {
9    Regex::new(
10        r"(?x)
11    (?P<uuid>[^/\\]*
12        (?-u:\b)[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}(?-u:\b)
13    [^/\\]*) |
14    (?P<sha1>[^/\\]*
15        (?-u:\b)[0-9a-fA-F]{40}(?-u:\b)
16    [^/\\]*) |
17    (?P<md5>[^/\\]*
18        (?-u:\b)[0-9a-fA-F]{32}(?-u:\b)
19    [^/\\]*) |
20    (?P<date>[^/\\]*
21        (?:
22            (?:[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]:[0-5][0-9]\.[0-9]+([+-][0-2][0-9]:[0-5][0-9]|Z))|
23            (?:[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]:[0-5][0-9]([+-][0-2][0-9]:[0-5][0-9]|Z))|
24            (?:[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]([+-][0-2][0-9]:[0-5][0-9]|Z))
25        ) |
26        (?:
27            (?-u:\b)(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat)(?-u:\s)+)?
28            (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)(?-u:\s)+
29            (?:[0-9]{1,2})(?-u:\s)+
30            (?:[0-9]{2}:[0-9]{2}:[0-9]{2})(?-u:\s)+
31            [0-9]{4}
32        ) |
33        (?:
34            (?-u:\b)(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat),(?-u:\s)+)?
35            (?:0[1-9]|[1-2]?[0-9]|3[01])(?-u:\s)+
36            (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)(?-u:\s)+
37            (?:19[0-9]{2}|[2-9][0-9]{3})(?-u:\s)+
38            (?:2[0-3]|[0-1][0-9]):([0-5][0-9])
39            (?::(60|[0-5][0-9]))?(?-u:\s)+
40            (?:[-\+][0-9]{2}[0-5][0-9]|(?:UT|GMT|(?:E|C|M|P)(?:ST|DT)|[A-IK-Z]))
41        )
42    [^/\\]*) |
43    (?P<hex>[^/\\]*
44        (?-u:\b)0[xX][0-9a-fA-F]+(?-u:\b)
45    [^/\\]*) |
46    (?:^|[/\\])
47    (?P<int>
48        (:?[^%/\\]|%[0-9a-fA-F]{2})*[0-9]{2,}
49    [^/\\]*)",
50    )
51    .unwrap()
52});
53
54/// Regex with multiple capture groups for resource tokens we should scrub.
55///
56/// Resource tokens are the tokens that exist in resource spans that generate
57/// high cardinality or are noise for the product. For example, the hash of the
58/// file next to its name.
59///
60/// Slightly modified Regex from
61/// <https://github.com/getsentry/sentry/blob/de5949a9a313d7ef0bf0685f84fe6e981ac38558/src/sentry/utils/performance_issues/base.py#L292-L306>
62pub static RESOURCE_NORMALIZER_REGEX: Lazy<Regex> = Lazy::new(|| {
63    Regex::new(
64        r"(?x)
65        # UUIDs.
66        (?P<uuid>[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}) |
67        # Version strings.
68        (?P<version>(v[0-9]+(?:\.[0-9]+)*)) |
69        # Hexadecimal strings with more than 5 digits.
70        (?P<hex>[a-fA-F0-9]{5}[a-fA-F0-9]+) |
71        # Integer IDs with more than one digit.
72        (?P<int>[0-9][0-9]+)
73        ",
74    )
75    .unwrap()
76});
77
78pub static DB_SQL_TRANSACTION_CORE_DATA_REGEX: Lazy<Regex> =
79    Lazy::new(|| Regex::new(r"(?P<int>[0-9]+)").unwrap());
80
81pub static DB_SUPABASE_REGEX: Lazy<Regex> = Lazy::new(|| {
82    Regex::new(
83        r"(?x)
84        # UUIDs.
85        (?P<uuid>[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}) |
86        # Hexadecimal strings with more than 5 digits.
87        (?P<hex>[a-fA-F0-9]{5}[a-fA-F0-9]+) |
88        # Integer IDs with more than one digit.
89        (?P<int>[0-9][0-9]+)
90        ",
91    )
92    .unwrap()
93});
94
95pub static FUNCTION_NORMALIZER_REGEX: Lazy<Regex> = Lazy::new(|| {
96    Regex::new(
97        r"(?x)
98        # UUIDs.
99        (?P<uuid>[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}) |
100        # Hexadecimal strings with more than 5 digits.
101        (?P<hex>[a-fA-F0-9]{5}[a-fA-F0-9]+)
102        ",
103    )
104    .unwrap()
105});