relay_event_normalization/
regexes.rs

1use std::sync::LazyLock;
2
3use regex::Regex;
4
5/// Contains multiple capture groups which will be used as a replace placeholder.
6///
7/// This regex is inspired by one used for grouping:
8/// <https://github.com/getsentry/sentry/blob/6ba59023a78bfe033e48ea4e035b64710a905c6b/src/sentry/grouping/strategies/message.py#L16-L97>
9pub static TRANSACTION_NAME_NORMALIZER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
10    Regex::new(
11        r"(?x)
12    (?P<uuid>[^/\\]*
13        (?-u:\b)[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}(?-u:\b)
14    [^/\\]*) |
15    (?P<sha1>[^/\\]*
16        (?-u:\b)[0-9a-fA-F]{40}(?-u:\b)
17    [^/\\]*) |
18    (?P<md5>[^/\\]*
19        (?-u:\b)[0-9a-fA-F]{32}(?-u:\b)
20    [^/\\]*) |
21    (?P<date>[^/\\]*
22        (?:
23            (?:[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]:[0-5][0-9]\.[0-9]+([+-][0-2][0-9]:[0-5][0-9]|Z))|
24            (?:[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]:[0-5][0-9]([+-][0-2][0-9]:[0-5][0-9]|Z))|
25            (?:[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]([+-][0-2][0-9]:[0-5][0-9]|Z))
26        ) |
27        (?:
28            (?-u:\b)(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat)(?-u:\s)+)?
29            (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)(?-u:\s)+
30            (?:[0-9]{1,2})(?-u:\s)+
31            (?:[0-9]{2}:[0-9]{2}:[0-9]{2})(?-u:\s)+
32            [0-9]{4}
33        ) |
34        (?:
35            (?-u:\b)(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat),(?-u:\s)+)?
36            (?:0[1-9]|[1-2]?[0-9]|3[01])(?-u:\s)+
37            (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)(?-u:\s)+
38            (?:19[0-9]{2}|[2-9][0-9]{3})(?-u:\s)+
39            (?:2[0-3]|[0-1][0-9]):([0-5][0-9])
40            (?::(60|[0-5][0-9]))?(?-u:\s)+
41            (?:[-\+][0-9]{2}[0-5][0-9]|(?:UT|GMT|(?:E|C|M|P)(?:ST|DT)|[A-IK-Z]))
42        )
43    [^/\\]*) |
44    (?P<hex>[^/\\]*
45        (?-u:\b)0[xX][0-9a-fA-F]+(?-u:\b)
46    [^/\\]*) |
47    (?:^|[/\\])
48    (?P<int>
49        (:?[^%/\\]|%[0-9a-fA-F]{2})*[0-9]{2,}
50    [^/\\]*)",
51    )
52    .unwrap()
53});
54
55/// Regex with multiple capture groups for resource tokens we should scrub.
56///
57/// Resource tokens are the tokens that exist in resource spans that generate
58/// high cardinality or are noise for the product. For example, the hash of the
59/// file next to its name.
60///
61/// Slightly modified Regex from
62/// <https://github.com/getsentry/sentry/blob/de5949a9a313d7ef0bf0685f84fe6e981ac38558/src/sentry/utils/performance_issues/base.py#L292-L306>
63pub static RESOURCE_NORMALIZER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
64    Regex::new(
65        r"(?x)
66        # UUIDs.
67        (?P<uuid>[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}) |
68        # Version strings.
69        (?P<version>(v[0-9]+(?:\.[0-9]+)*)) |
70        # Hexadecimal strings with more than 5 digits.
71        (?P<hex>[a-fA-F0-9]{5}[a-fA-F0-9]+) |
72        # Integer IDs with more than one digit.
73        (?P<int>[0-9][0-9]+)
74        ",
75    )
76    .unwrap()
77});
78
79pub static DB_SQL_TRANSACTION_CORE_DATA_REGEX: LazyLock<Regex> =
80    LazyLock::new(|| Regex::new(r"(?P<int>[0-9]+)").unwrap());
81
82pub static DB_SUPABASE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
83    Regex::new(
84        r"(?x)
85        # UUIDs.
86        (?P<uuid>[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}) |
87        # Hexadecimal strings with more than 5 digits.
88        (?P<hex>[a-fA-F0-9]{5}[a-fA-F0-9]+) |
89        # Integer IDs with more than one digit.
90        (?P<int>[0-9][0-9]+)
91        ",
92    )
93    .unwrap()
94});
95
96pub static FUNCTION_NORMALIZER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
97    Regex::new(
98        r"(?x)
99        # UUIDs.
100        (?P<uuid>[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}) |
101        # Hexadecimal strings with more than 5 digits.
102        (?P<hex>[a-fA-F0-9]{5}[a-fA-F0-9]+)
103        ",
104    )
105    .unwrap()
106});