relay_metrics/
protocol.rs

1use std::hash::Hasher as _;
2
3use hash32::{FnvHasher, Hasher as _};
4
5#[doc(inline)]
6pub use relay_base_schema::metrics::{
7    CustomUnit, DurationUnit, FractionUnit, InformationUnit, MetricName, MetricNamespace,
8    MetricResourceIdentifier, MetricType, MetricUnit, ParseMetricError, ParseMetricUnitError,
9};
10#[doc(inline)]
11pub use relay_common::time::UnixTimestamp;
12#[doc(inline)]
13pub use unescaper::Error as UnescapeError;
14
15use crate::{Bucket, FiniteF64, MetricTags};
16
17/// Type used for Counter metric
18pub type CounterType = FiniteF64;
19
20/// Type of distribution entries
21pub type DistributionType = FiniteF64;
22
23/// Type used for set elements in Set metric
24pub type SetType = u32;
25
26/// Type used for Gauge entries
27pub type GaugeType = FiniteF64;
28
29/// Error returned from [`normalize_bucket`].
30#[derive(Debug, thiserror::Error)]
31pub enum NormalizationError {
32    /// The metric name includes an invalid or unsupported metric namespace.
33    #[error("unsupported metric namespace")]
34    UnsupportedNamespace,
35    /// The metric name cannot be parsed and is invalid.
36    #[error("invalid metric name: {0:?}")]
37    InvalidMetricName(MetricName),
38}
39
40/// Normalizes a bucket.
41///
42/// The passed metric will have its name and tags normalized and tested for validity.
43/// Invalid characters in the metric name may be replaced,
44/// see [`relay_base_schema::metrics::try_normalize_metric_name`].
45///
46/// Invalid tags are removed and tag keys are normalized, for example control characters are
47/// removed from tag keys.
48pub fn normalize_bucket(bucket: &mut Bucket) -> Result<(), NormalizationError> {
49    normalize_metric_name(&mut bucket.name)?;
50    normalize_metric_tags(&mut bucket.tags);
51    Ok(())
52}
53
54/// Normalizes a metric name.
55///
56/// Normalization includes expanding valid metric names without a namespace to the default
57/// namespace.
58///
59/// Invalid metric names are rejected with [`NormalizationError`].
60fn normalize_metric_name(name: &mut MetricName) -> Result<(), NormalizationError> {
61    *name = match MetricResourceIdentifier::parse(name) {
62        Ok(mri) => {
63            if matches!(mri.namespace, MetricNamespace::Unsupported) {
64                return Err(NormalizationError::UnsupportedNamespace);
65            }
66
67            // We can improve this code part, by not always re-creating the name, if the name is
68            // already a valid MRI with namespace we can use the original name instead.
69            mri.to_string().into()
70        }
71        Err(_) => {
72            return Err(NormalizationError::InvalidMetricName(name.clone()));
73        }
74    };
75
76    Ok(())
77}
78
79/// Removes tags with invalid characters in the key, and validates tag values.
80///
81/// Tag values are validated with [`normalize_tag_value`].
82fn normalize_metric_tags(tags: &mut MetricTags) {
83    tags.retain(|tag_key, tag_value| {
84        if !is_valid_tag_key(tag_key) {
85            relay_log::debug!("invalid metric tag key {tag_key:?}");
86            return false;
87        }
88
89        normalize_tag_value(tag_value);
90
91        true
92    });
93}
94
95/// Validates a tag key.
96///
97/// Tag keys currently only need to not contain ASCII control characters. This might change.
98pub(crate) fn is_valid_tag_key(tag_key: &str) -> bool {
99    // iterating over bytes produces better asm, and we're only checking for ascii chars
100    for &byte in tag_key.as_bytes() {
101        if (byte as char).is_ascii_control() {
102            return false;
103        }
104    }
105    true
106}
107
108/// Replaces restricted characters with escape sequences.
109///
110/// All other characters are replaced with the following rules:
111///  - Tab is escaped as `\t`.
112///  - Carriage return is escaped as `\r`.
113///  - Line feed is escaped as `\n`.
114///  - Backslash is escaped as `\\`.
115///  - Commas and pipes are given unicode escapes in the form `\u{2c}` and `\u{7c}`, respectively.
116#[allow(unused)]
117pub(crate) fn escape_tag_value(raw: &str) -> String {
118    let mut escaped = String::with_capacity(raw.len());
119
120    for c in raw.chars() {
121        match c {
122            '\t' => escaped.push_str("\\t"),
123            '\n' => escaped.push_str("\\n"),
124            '\r' => escaped.push_str("\\r"),
125            '\\' => escaped.push_str("\\\\"),
126            '|' => escaped.push_str("\\u{7c}"),
127            ',' => escaped.push_str("\\u{2c}"),
128            _ if c.is_control() => (),
129            _ => escaped.push(c),
130        }
131    }
132
133    escaped
134}
135
136/// Decodes and normalizes a potentially escaped tag value into a raw string.
137///
138/// This replaces escape sequences following the rules of [`escape_tag_value`] with their original
139/// unicode characters. In addition to that, unicode escape sequences for all characters will be
140/// resolved.
141///
142/// Control characters are stripped from the resulting string. This is equivalent to
143/// [`normalize_tag_value`].
144pub(crate) fn unescape_tag_value(escaped: &str) -> Result<String, UnescapeError> {
145    let mut unescaped = unescaper::unescape(escaped)?;
146    normalize_tag_value(&mut unescaped);
147    Ok(unescaped)
148}
149
150/// Normalizes a tag value.
151///
152/// Tag values are never entirely rejected, but invalid characters (control characters) are stripped
153/// out.
154pub(crate) fn normalize_tag_value(tag_value: &mut String) {
155    tag_value.retain(|c| !c.is_control());
156}
157
158/// Hashes the given set value.
159///
160/// Sets only guarantee 32-bit accuracy, but arbitrary strings are allowed on the protocol. Upon
161/// parsing, they are hashed and only used as hashes subsequently.
162pub(crate) fn hash_set_value(string: &str) -> u32 {
163    let mut hasher = FnvHasher::default();
164    hasher.write(string.as_bytes());
165    hasher.finish32()
166}
167
168#[cfg(test)]
169mod tests {
170    use insta::assert_json_snapshot;
171
172    use crate::BucketValue;
173
174    use super::*;
175
176    #[test]
177    fn test_unescape_tag_value() {
178        // No escaping
179        assert_eq!(unescape_tag_value("plain").unwrap(), "plain");
180        assert_eq!(unescape_tag_value("plain text").unwrap(), "plain text");
181        assert_eq!(unescape_tag_value("plain%text").unwrap(), "plain%text");
182
183        // Escape sequences
184        assert_eq!(
185            unescape_tag_value("plain \\\\ text").unwrap(),
186            "plain \\ text"
187        );
188        assert_eq!(
189            unescape_tag_value("plain\\u{2c}text").unwrap(),
190            "plain,text"
191        );
192        assert_eq!(
193            unescape_tag_value("plain\\u{7c}text").unwrap(),
194            "plain|text"
195        );
196        assert_eq!(unescape_tag_value("plain 😅").unwrap(), "plain 😅");
197
198        // Alternate escape sequences
199        assert_eq!(
200            unescape_tag_value("plain \\u{5c} text").unwrap(),
201            "plain \\ text"
202        );
203
204        // These are control characters and therefore stripped
205        assert_eq!(unescape_tag_value("plain\\ntext").unwrap(), "plaintext");
206        assert_eq!(unescape_tag_value("plain\\rtext").unwrap(), "plaintext");
207        assert_eq!(unescape_tag_value("plain\\ttext").unwrap(), "plaintext");
208        assert_eq!(unescape_tag_value("plain\u{7}text").unwrap(), "plaintext");
209    }
210
211    #[test]
212    fn test_escape_tag_value() {
213        // No escaping
214        assert_eq!(escape_tag_value("plain"), "plain");
215        assert_eq!(escape_tag_value("plain text"), "plain text");
216        assert_eq!(escape_tag_value("plain%text"), "plain%text");
217
218        // Escape sequences
219        assert_eq!(escape_tag_value("plain \\ text"), "plain \\\\ text");
220        assert_eq!(escape_tag_value("plain,text"), "plain\\u{2c}text");
221        assert_eq!(escape_tag_value("plain|text"), "plain\\u{7c}text");
222        assert_eq!(escape_tag_value("plain 😅"), "plain 😅");
223
224        // Escapable control characters (may be stripped by the parser)
225        assert_eq!(escape_tag_value("plain\ntext"), "plain\\ntext");
226        assert_eq!(escape_tag_value("plain\rtext"), "plain\\rtext");
227        assert_eq!(escape_tag_value("plain\ttext"), "plain\\ttext");
228
229        // Unescapable control characters
230        assert_eq!(escape_tag_value("plain\u{07}text"), "plaintext");
231        assert_eq!(escape_tag_value("plain\u{9c}text"), "plaintext");
232    }
233
234    #[test]
235    fn test_normalize_invalid_name() {
236        let mut bucket = Bucket {
237            timestamp: UnixTimestamp::from_secs(5000),
238            width: 0,
239            name: "c:transactions/\0hergus.bergus@none".into(),
240            value: BucketValue::Counter(0.into()),
241            tags: Default::default(),
242            metadata: Default::default(),
243        };
244
245        assert!(matches!(
246            normalize_bucket(&mut bucket),
247            Err(NormalizationError::InvalidMetricName(_))
248        ));
249    }
250
251    #[test]
252    fn test_normalize_invalid_namespace() {
253        let mut bucket = Bucket {
254            timestamp: UnixTimestamp::from_secs(5000),
255            width: 0,
256            name: "c:lol/hergus.bergus@none".into(),
257            value: BucketValue::Counter(0.into()),
258            tags: Default::default(),
259            metadata: Default::default(),
260        };
261
262        assert!(matches!(
263            normalize_bucket(&mut bucket),
264            Err(NormalizationError::UnsupportedNamespace)
265        ));
266    }
267
268    #[test]
269    fn test_normalize_name() {
270        let mut bucket = Bucket {
271            timestamp: UnixTimestamp::from_secs(5000),
272            width: 0,
273            name: "c:hergus\0\0bergus".into(),
274            value: BucketValue::Counter(0.into()),
275            tags: Default::default(),
276            metadata: Default::default(),
277        };
278
279        normalize_bucket(&mut bucket).unwrap();
280
281        assert_eq!(&bucket.name, "c:custom/hergus_bergus@none");
282    }
283
284    #[test]
285    fn test_normalize_tag_key_chars() {
286        let mut bucket = Bucket {
287            timestamp: UnixTimestamp::from_secs(5000),
288            width: 0,
289            name: "c:transactions/hergus.bergus".into(),
290            value: BucketValue::Counter(0.into()),
291            tags: {
292                let mut tags = MetricTags::new();
293                // There are some SDKs which mess up content encodings, and interpret the raw bytes
294                // of an UTF-16 string as UTF-8. Leading to ASCII
295                // strings getting null-bytes interleaved.
296                //
297                // Somehow those values end up as release tag in sessions, while in error events we
298                // haven't observed this malformed encoding. We believe it's slightly better to
299                // strip out NUL-bytes instead of dropping the tag such that those values line up
300                // again across sessions and events. Should that cause too high cardinality we'll
301                // have to drop tags.
302                //
303                // Note that releases are validated separately against much stricter character set,
304                // but the above idea should still apply to other tags.
305                tags.insert(
306                    "is_it_garbage".to_owned(),
307                    "a\0b\0s\0o\0l\0u\0t\0e\0l\0y".to_owned(),
308                );
309                tags.insert("another\0garbage".to_owned(), "bye".to_owned());
310                tags
311            },
312            metadata: Default::default(),
313        };
314
315        normalize_bucket(&mut bucket).unwrap();
316
317        assert_json_snapshot!(bucket, @r###"
318        {
319          "timestamp": 5000,
320          "width": 0,
321          "name": "c:transactions/hergus.bergus@none",
322          "type": "c",
323          "value": 0.0,
324          "tags": {
325            "is_it_garbage": "absolutely"
326          }
327        }
328        "###);
329    }
330}