relay_metrics/
protocol.rs

1use std::hash::Hasher as _;
2
3use hash32::{FnvHasher, Hasher as _};
4use relay_protocol::FiniteF64;
5
6#[doc(inline)]
7pub use relay_base_schema::metrics::{
8    CustomUnit, DurationUnit, FractionUnit, InformationUnit, MetricName, MetricNamespace,
9    MetricResourceIdentifier, MetricType, MetricUnit, ParseMetricError, ParseMetricUnitError,
10};
11#[doc(inline)]
12pub use relay_common::time::UnixTimestamp;
13#[doc(inline)]
14pub use unescaper::Error as UnescapeError;
15
16use crate::{Bucket, MetricTags};
17
18/// Type used for Counter metric
19pub type CounterType = FiniteF64;
20
21/// Type of distribution entries
22pub type DistributionType = FiniteF64;
23
24/// Type used for set elements in Set metric
25pub type SetType = u32;
26
27/// Type used for Gauge entries
28pub type GaugeType = FiniteF64;
29
30/// Error returned from [`normalize_bucket`].
31#[derive(Debug, thiserror::Error)]
32pub enum NormalizationError {
33    /// The metric name includes an invalid or unsupported metric namespace.
34    #[error("unsupported metric namespace")]
35    UnsupportedNamespace,
36    /// The metric name cannot be parsed and is invalid.
37    #[error("invalid metric name: {0:?}")]
38    InvalidMetricName(MetricName),
39}
40
41/// Normalizes a bucket.
42///
43/// The passed metric will have its name and tags normalized and tested for validity.
44/// Invalid characters in the metric name may be replaced,
45/// see [`relay_base_schema::metrics::try_normalize_metric_name`].
46///
47/// Invalid tags are removed and tag keys are normalized, for example control characters are
48/// removed from tag keys.
49pub fn normalize_bucket(bucket: &mut Bucket) -> Result<(), NormalizationError> {
50    normalize_metric_name(&mut bucket.name)?;
51    normalize_metric_tags(&mut bucket.tags);
52    Ok(())
53}
54
55/// Normalizes a metric name.
56///
57/// Normalization includes expanding valid metric names without a namespace to the default
58/// namespace.
59///
60/// Invalid metric names are rejected with [`NormalizationError`].
61fn normalize_metric_name(name: &mut MetricName) -> Result<(), NormalizationError> {
62    *name = match MetricResourceIdentifier::parse(name) {
63        Ok(mri) => {
64            if matches!(mri.namespace, MetricNamespace::Unsupported) {
65                return Err(NormalizationError::UnsupportedNamespace);
66            }
67
68            // We can improve this code part, by not always re-creating the name, if the name is
69            // already a valid MRI with namespace we can use the original name instead.
70            mri.to_string().into()
71        }
72        Err(_) => {
73            return Err(NormalizationError::InvalidMetricName(name.clone()));
74        }
75    };
76
77    Ok(())
78}
79
80/// Removes tags with invalid characters in the key, and validates tag values.
81///
82/// Tag values are validated with [`normalize_tag_value`].
83fn normalize_metric_tags(tags: &mut MetricTags) {
84    tags.retain(|tag_key, tag_value| {
85        if !is_valid_tag_key(tag_key) {
86            relay_log::debug!("invalid metric tag key {tag_key:?}");
87            return false;
88        }
89
90        normalize_tag_value(tag_value);
91
92        true
93    });
94}
95
96/// Validates a tag key.
97///
98/// Tag keys currently only need to not contain ASCII control characters. This might change.
99pub(crate) fn is_valid_tag_key(tag_key: &str) -> bool {
100    // iterating over bytes produces better asm, and we're only checking for ascii chars
101    for &byte in tag_key.as_bytes() {
102        if (byte as char).is_ascii_control() {
103            return false;
104        }
105    }
106    true
107}
108
109/// Replaces restricted characters with escape sequences.
110///
111/// All other characters are replaced with the following rules:
112///  - Tab is escaped as `\t`.
113///  - Carriage return is escaped as `\r`.
114///  - Line feed is escaped as `\n`.
115///  - Backslash is escaped as `\\`.
116///  - Commas and pipes are given unicode escapes in the form `\u{2c}` and `\u{7c}`, respectively.
117#[allow(unused)]
118pub(crate) fn escape_tag_value(raw: &str) -> String {
119    let mut escaped = String::with_capacity(raw.len());
120
121    for c in raw.chars() {
122        match c {
123            '\t' => escaped.push_str("\\t"),
124            '\n' => escaped.push_str("\\n"),
125            '\r' => escaped.push_str("\\r"),
126            '\\' => escaped.push_str("\\\\"),
127            '|' => escaped.push_str("\\u{7c}"),
128            ',' => escaped.push_str("\\u{2c}"),
129            _ if c.is_control() => (),
130            _ => escaped.push(c),
131        }
132    }
133
134    escaped
135}
136
137/// Decodes and normalizes a potentially escaped tag value into a raw string.
138///
139/// This replaces escape sequences following the rules of [`escape_tag_value`] with their original
140/// unicode characters. In addition to that, unicode escape sequences for all characters will be
141/// resolved.
142///
143/// Control characters are stripped from the resulting string. This is equivalent to
144/// [`normalize_tag_value`].
145pub(crate) fn unescape_tag_value(escaped: &str) -> Result<String, UnescapeError> {
146    let mut unescaped = unescaper::unescape(escaped)?;
147    normalize_tag_value(&mut unescaped);
148    Ok(unescaped)
149}
150
151/// Normalizes a tag value.
152///
153/// Tag values are never entirely rejected, but invalid characters (control characters) are stripped
154/// out.
155pub(crate) fn normalize_tag_value(tag_value: &mut String) {
156    tag_value.retain(|c| !c.is_control());
157}
158
159/// Hashes the given set value.
160///
161/// Sets only guarantee 32-bit accuracy, but arbitrary strings are allowed on the protocol. Upon
162/// parsing, they are hashed and only used as hashes subsequently.
163pub(crate) fn hash_set_value(string: &str) -> u32 {
164    let mut hasher = FnvHasher::default();
165    hasher.write(string.as_bytes());
166    hasher.finish32()
167}
168
169#[cfg(test)]
170mod tests {
171    use insta::assert_json_snapshot;
172
173    use crate::BucketValue;
174
175    use super::*;
176
177    #[test]
178    fn test_unescape_tag_value() {
179        // No escaping
180        assert_eq!(unescape_tag_value("plain").unwrap(), "plain");
181        assert_eq!(unescape_tag_value("plain text").unwrap(), "plain text");
182        assert_eq!(unescape_tag_value("plain%text").unwrap(), "plain%text");
183
184        // Escape sequences
185        assert_eq!(
186            unescape_tag_value("plain \\\\ text").unwrap(),
187            "plain \\ text"
188        );
189        assert_eq!(
190            unescape_tag_value("plain\\u{2c}text").unwrap(),
191            "plain,text"
192        );
193        assert_eq!(
194            unescape_tag_value("plain\\u{7c}text").unwrap(),
195            "plain|text"
196        );
197        assert_eq!(unescape_tag_value("plain 😅").unwrap(), "plain 😅");
198
199        // Alternate escape sequences
200        assert_eq!(
201            unescape_tag_value("plain \\u{5c} text").unwrap(),
202            "plain \\ text"
203        );
204
205        // These are control characters and therefore stripped
206        assert_eq!(unescape_tag_value("plain\\ntext").unwrap(), "plaintext");
207        assert_eq!(unescape_tag_value("plain\\rtext").unwrap(), "plaintext");
208        assert_eq!(unescape_tag_value("plain\\ttext").unwrap(), "plaintext");
209        assert_eq!(unescape_tag_value("plain\u{7}text").unwrap(), "plaintext");
210    }
211
212    #[test]
213    fn test_escape_tag_value() {
214        // No escaping
215        assert_eq!(escape_tag_value("plain"), "plain");
216        assert_eq!(escape_tag_value("plain text"), "plain text");
217        assert_eq!(escape_tag_value("plain%text"), "plain%text");
218
219        // Escape sequences
220        assert_eq!(escape_tag_value("plain \\ text"), "plain \\\\ text");
221        assert_eq!(escape_tag_value("plain,text"), "plain\\u{2c}text");
222        assert_eq!(escape_tag_value("plain|text"), "plain\\u{7c}text");
223        assert_eq!(escape_tag_value("plain 😅"), "plain 😅");
224
225        // Escapable control characters (may be stripped by the parser)
226        assert_eq!(escape_tag_value("plain\ntext"), "plain\\ntext");
227        assert_eq!(escape_tag_value("plain\rtext"), "plain\\rtext");
228        assert_eq!(escape_tag_value("plain\ttext"), "plain\\ttext");
229
230        // Unescapable control characters
231        assert_eq!(escape_tag_value("plain\u{07}text"), "plaintext");
232        assert_eq!(escape_tag_value("plain\u{9c}text"), "plaintext");
233    }
234
235    #[test]
236    fn test_normalize_invalid_name() {
237        let mut bucket = Bucket {
238            timestamp: UnixTimestamp::from_secs(5000),
239            width: 0,
240            name: "c:transactions/\0hergus.bergus@none".into(),
241            value: BucketValue::Counter(0.into()),
242            tags: Default::default(),
243            metadata: Default::default(),
244        };
245
246        assert!(matches!(
247            normalize_bucket(&mut bucket),
248            Err(NormalizationError::InvalidMetricName(_))
249        ));
250    }
251
252    #[test]
253    fn test_normalize_invalid_namespace() {
254        let mut bucket = Bucket {
255            timestamp: UnixTimestamp::from_secs(5000),
256            width: 0,
257            name: "c:lol/hergus.bergus@none".into(),
258            value: BucketValue::Counter(0.into()),
259            tags: Default::default(),
260            metadata: Default::default(),
261        };
262
263        assert!(matches!(
264            normalize_bucket(&mut bucket),
265            Err(NormalizationError::UnsupportedNamespace)
266        ));
267    }
268
269    #[test]
270    fn test_normalize_name() {
271        let mut bucket = Bucket {
272            timestamp: UnixTimestamp::from_secs(5000),
273            width: 0,
274            name: "c:hergus\0\0bergus".into(),
275            value: BucketValue::Counter(0.into()),
276            tags: Default::default(),
277            metadata: Default::default(),
278        };
279
280        normalize_bucket(&mut bucket).unwrap();
281
282        assert_eq!(&bucket.name, "c:custom/hergus_bergus@none");
283    }
284
285    #[test]
286    fn test_normalize_tag_key_chars() {
287        let mut bucket = Bucket {
288            timestamp: UnixTimestamp::from_secs(5000),
289            width: 0,
290            name: "c:transactions/hergus.bergus".into(),
291            value: BucketValue::Counter(0.into()),
292            tags: {
293                let mut tags = MetricTags::new();
294                // There are some SDKs which mess up content encodings, and interpret the raw bytes
295                // of an UTF-16 string as UTF-8. Leading to ASCII
296                // strings getting null-bytes interleaved.
297                //
298                // Somehow those values end up as release tag in sessions, while in error events we
299                // haven't observed this malformed encoding. We believe it's slightly better to
300                // strip out NUL-bytes instead of dropping the tag such that those values line up
301                // again across sessions and events. Should that cause too high cardinality we'll
302                // have to drop tags.
303                //
304                // Note that releases are validated separately against much stricter character set,
305                // but the above idea should still apply to other tags.
306                tags.insert(
307                    "is_it_garbage".to_owned(),
308                    "a\0b\0s\0o\0l\0u\0t\0e\0l\0y".to_owned(),
309                );
310                tags.insert("another\0garbage".to_owned(), "bye".to_owned());
311                tags
312            },
313            metadata: Default::default(),
314        };
315
316        normalize_bucket(&mut bucket).unwrap();
317
318        assert_json_snapshot!(bucket, @r###"
319        {
320          "timestamp": 5000,
321          "width": 0,
322          "name": "c:transactions/hergus.bergus@none",
323          "type": "c",
324          "value": 0.0,
325          "tags": {
326            "is_it_garbage": "absolutely"
327          }
328        }
329        "###);
330    }
331}
relay_metrics/protocol.rs

relay_metrics/
protocol.rs