relay_event_schema/processor/
attrs.rs

1use std::borrow::Cow;
2use std::fmt;
3use std::ops::{Deref, RangeInclusive};
4
5use enumset::{EnumSet, EnumSetType};
6use relay_protocol::Annotated;
7
8use crate::processor::ProcessValue;
9
10/// Error for unknown value types.
11#[derive(Debug)]
12pub struct UnknownValueTypeError;
13
14impl fmt::Display for UnknownValueTypeError {
15    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
16        write!(f, "unknown value type")
17    }
18}
19
20impl std::error::Error for UnknownValueTypeError {}
21
22/// The (simplified) type of a value.
23#[derive(Debug, Ord, PartialOrd, EnumSetType)]
24pub enum ValueType {
25    // Basic types
26    String,
27    Binary,
28    Number,
29    Boolean,
30    DateTime,
31    Array,
32    Object,
33
34    // Roots
35    Event,
36    Attachments,
37    Replay,
38
39    // Protocol types
40    Exception,
41    Stacktrace,
42    Frame,
43    Request,
44    User,
45    LogEntry,
46    Message,
47    Thread,
48    Breadcrumb,
49    OurLog,
50    Span,
51    ClientSdkInfo,
52
53    // Attachments and Contents
54    Minidump,
55    HeapMemory,
56    StackMemory,
57}
58
59impl ValueType {
60    pub fn for_field<T: ProcessValue>(field: &Annotated<T>) -> EnumSet<Self> {
61        field
62            .value()
63            .map(ProcessValue::value_type)
64            .unwrap_or_else(EnumSet::empty)
65    }
66}
67
68relay_common::derive_fromstr_and_display!(ValueType, UnknownValueTypeError, {
69    ValueType::String => "string",
70    ValueType::Binary => "binary",
71    ValueType::Number => "number",
72    ValueType::Boolean => "boolean" | "bool",
73    ValueType::DateTime => "datetime",
74    ValueType::Array => "array" | "list",
75    ValueType::Object => "object",
76    ValueType::Event => "event",
77    ValueType::Attachments => "attachments",
78    ValueType::Replay => "replay",
79    ValueType::Exception => "error" | "exception",
80    ValueType::Stacktrace => "stack" | "stacktrace",
81    ValueType::Frame => "frame",
82    ValueType::Request => "http" | "request",
83    ValueType::User => "user",
84    ValueType::LogEntry => "logentry",
85    ValueType::Message => "message",
86    ValueType::Thread => "thread",
87    ValueType::Breadcrumb => "breadcrumb",
88    ValueType::OurLog => "log",
89
90    ValueType::Span => "span",
91    ValueType::ClientSdkInfo => "sdk",
92    ValueType::Minidump => "minidump",
93    ValueType::HeapMemory => "heap_memory",
94    ValueType::StackMemory => "stack_memory",
95});
96
97/// Whether an attribute should be PII-strippable/should be subject to datascrubbers
98#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
99pub enum Pii {
100    /// The field will be stripped by default
101    True,
102    /// The field cannot be stripped at all
103    False,
104    /// The field will only be stripped when addressed with a specific path selector, but generic
105    /// selectors such as `$string` do not apply.
106    Maybe,
107}
108
109/// A static or dynamic `Pii` value.
110#[derive(Debug, Clone, Copy)]
111pub enum PiiMode {
112    /// A static value.
113    Static(Pii),
114    /// A dynamic value, computed based on a `ProcessingState`.
115    Dynamic(fn(&ProcessingState) -> Pii),
116}
117
118/// Meta information about a field.
119#[derive(Debug, Clone, Copy)]
120pub struct FieldAttrs {
121    /// Optionally the name of the field.
122    pub name: Option<&'static str>,
123    /// If the field is required.
124    pub required: bool,
125    /// If the field should be non-empty.
126    pub nonempty: bool,
127    /// Whether to trim whitespace from this string.
128    pub trim_whitespace: bool,
129    /// A set of allowed or denied character ranges for this string.
130    pub characters: Option<CharacterSet>,
131    /// The maximum char length of this field.
132    pub max_chars: Option<usize>,
133    /// The extra char length allowance on top of max_chars.
134    pub max_chars_allowance: usize,
135    /// The maximum depth of this field.
136    pub max_depth: Option<usize>,
137    /// The maximum number of bytes of this field.
138    pub max_bytes: Option<usize>,
139    /// The type of PII on the field.
140    pub pii: PiiMode,
141    /// Whether additional properties should be retained during normalization.
142    pub retain: bool,
143    /// Whether the trimming processor is allowed to shorten or drop this field.
144    pub trim: bool,
145}
146
147/// A set of characters allowed or denied for a (string) field.
148///
149/// Note that this field is generated in the derive, it can't be constructed easily in tests.
150#[derive(Clone, Copy)]
151pub struct CharacterSet {
152    /// Generated in derive for performance. Can be left out when set is created manually.
153    pub char_is_valid: fn(char) -> bool,
154    /// A set of ranges that are allowed/denied within the character set
155    pub ranges: &'static [RangeInclusive<char>],
156    /// Whether the character set is inverted
157    pub is_negative: bool,
158}
159
160impl fmt::Debug for CharacterSet {
161    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
162        f.debug_struct("CharacterSet")
163            .field("ranges", &self.ranges)
164            .field("is_negative", &self.is_negative)
165            .finish()
166    }
167}
168
169impl FieldAttrs {
170    /// Creates default `FieldAttrs`.
171    pub const fn new() -> Self {
172        FieldAttrs {
173            name: None,
174            required: false,
175            nonempty: false,
176            trim_whitespace: false,
177            characters: None,
178            max_chars: None,
179            max_chars_allowance: 0,
180            max_depth: None,
181            max_bytes: None,
182            pii: PiiMode::Static(Pii::False),
183            retain: false,
184            trim: true,
185        }
186    }
187
188    /// Sets whether a value in this field is required.
189    pub const fn required(mut self, required: bool) -> Self {
190        self.required = required;
191        self
192    }
193
194    /// Sets whether this field can have an empty value.
195    ///
196    /// This is distinct from `required`. An empty string (`""`) passes the "required" check but not the
197    /// "nonempty" one.
198    pub const fn nonempty(mut self, nonempty: bool) -> Self {
199        self.nonempty = nonempty;
200        self
201    }
202
203    /// Sets whether whitespace should be trimmed before validation.
204    pub const fn trim_whitespace(mut self, trim_whitespace: bool) -> Self {
205        self.trim_whitespace = trim_whitespace;
206        self
207    }
208
209    /// Sets whether this field contains PII.
210    pub const fn pii(mut self, pii: Pii) -> Self {
211        self.pii = PiiMode::Static(pii);
212        self
213    }
214
215    /// Sets whether this field contains PII dynamically based on the current state.
216    pub const fn pii_dynamic(mut self, pii: fn(&ProcessingState) -> Pii) -> Self {
217        self.pii = PiiMode::Dynamic(pii);
218        self
219    }
220
221    /// Sets the maximum number of characters allowed in the field.
222    pub const fn max_chars(mut self, max_chars: usize) -> Self {
223        self.max_chars = Some(max_chars);
224        self
225    }
226
227    /// Sets whether additional properties should be retained during normalization.
228    pub const fn retain(mut self, retain: bool) -> Self {
229        self.retain = retain;
230        self
231    }
232}
233
234static DEFAULT_FIELD_ATTRS: FieldAttrs = FieldAttrs::new();
235static PII_TRUE_FIELD_ATTRS: FieldAttrs = FieldAttrs::new().pii(Pii::True);
236static PII_MAYBE_FIELD_ATTRS: FieldAttrs = FieldAttrs::new().pii(Pii::Maybe);
237
238impl Default for FieldAttrs {
239    fn default() -> Self {
240        Self::new()
241    }
242}
243
244#[derive(Debug, Clone, Eq, Ord, PartialOrd)]
245enum PathItem<'a> {
246    StaticKey(&'a str),
247    OwnedKey(String),
248    Index(usize),
249}
250
251impl<'a> PartialEq for PathItem<'a> {
252    fn eq(&self, other: &PathItem<'a>) -> bool {
253        self.key() == other.key() && self.index() == other.index()
254    }
255}
256
257impl PathItem<'_> {
258    /// Returns the key if there is one
259    #[inline]
260    pub fn key(&self) -> Option<&str> {
261        match self {
262            PathItem::StaticKey(s) => Some(s),
263            PathItem::OwnedKey(s) => Some(s.as_str()),
264            PathItem::Index(_) => None,
265        }
266    }
267
268    /// Returns the index if there is one
269    #[inline]
270    pub fn index(&self) -> Option<usize> {
271        match self {
272            PathItem::StaticKey(_) | PathItem::OwnedKey(_) => None,
273            PathItem::Index(idx) => Some(*idx),
274        }
275    }
276}
277
278impl fmt::Display for PathItem<'_> {
279    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
280        match self {
281            PathItem::StaticKey(s) => f.pad(s),
282            PathItem::OwnedKey(s) => f.pad(s.as_str()),
283            PathItem::Index(val) => write!(f, "{val}"),
284        }
285    }
286}
287
288/// Like [`std::borrow::Cow`], but with a boxed value.
289///
290/// This is useful for types that contain themselves, where otherwise the layout of the type
291/// cannot be computed, for example
292///
293/// ```rust,ignore
294/// struct Foo<'a>(Cow<'a, Foo<'a>>); // will not compile
295/// struct Bar<'a>(BoxCow<'a, Bar<'a>>); // will compile
296/// ```
297#[derive(Debug, Clone)]
298enum BoxCow<'a, T> {
299    Borrowed(&'a T),
300    Owned(Box<T>),
301}
302
303impl<T> Deref for BoxCow<'_, T> {
304    type Target = T;
305
306    fn deref(&self) -> &Self::Target {
307        match self {
308            BoxCow::Borrowed(inner) => inner,
309            BoxCow::Owned(inner) => inner.deref(),
310        }
311    }
312}
313
314/// An event's processing state.
315///
316/// The processing state describes an item in an event which is being processed, an example
317/// of processing might be scrubbing the event for PII.  The processing state itself
318/// describes the current item and it's parent, which allows you to follow all the items up
319/// to the root item.  You can think of processing an event as a visitor pattern visiting
320/// all items in the event and the processing state is a stack describing the currently
321/// visited item and all it's parents.
322#[derive(Debug, Clone)]
323pub struct ProcessingState<'a> {
324    // In event scrubbing, every state holds a reference to its parent.
325    // In Replay scrubbing, we do not call `process_*` recursively,
326    // but instead hold a single `ProcessingState` that represents the current item.
327    // This item owns its parent (plus ancestors) exclusively, which is why we use `BoxCow` here
328    // rather than `Rc` / `Arc`.
329    parent: Option<BoxCow<'a, ProcessingState<'a>>>,
330    path_item: Option<PathItem<'a>>,
331    attrs: Option<Cow<'a, FieldAttrs>>,
332    value_type: EnumSet<ValueType>,
333    depth: usize,
334}
335
336static ROOT_STATE: ProcessingState = ProcessingState {
337    parent: None,
338    path_item: None,
339    attrs: None,
340    value_type: enumset::enum_set!(),
341    depth: 0,
342};
343
344impl<'a> ProcessingState<'a> {
345    /// Returns the root processing state.
346    pub fn root() -> &'static ProcessingState<'static> {
347        &ROOT_STATE
348    }
349
350    /// Creates a new root state.
351    pub fn new_root(
352        attrs: Option<Cow<'static, FieldAttrs>>,
353        value_type: impl IntoIterator<Item = ValueType>,
354    ) -> ProcessingState<'static> {
355        ProcessingState {
356            parent: None,
357            path_item: None,
358            attrs,
359            value_type: value_type.into_iter().collect(),
360            depth: 0,
361        }
362    }
363
364    /// Derives a processing state by entering a borrowed key.
365    pub fn enter_borrowed(
366        &'a self,
367        key: &'a str,
368        attrs: Option<Cow<'a, FieldAttrs>>,
369        value_type: impl IntoIterator<Item = ValueType>,
370    ) -> Self {
371        ProcessingState {
372            parent: Some(BoxCow::Borrowed(self)),
373            path_item: Some(PathItem::StaticKey(key)),
374            attrs,
375            value_type: value_type.into_iter().collect(),
376            depth: self.depth + 1,
377        }
378    }
379
380    /// Derives a processing state by entering an owned key.
381    ///
382    /// The new (child) state takes ownership of the current (parent) state.
383    pub fn enter_owned(
384        self,
385        key: String,
386        attrs: Option<Cow<'a, FieldAttrs>>,
387        value_type: impl IntoIterator<Item = ValueType>,
388    ) -> Self {
389        let depth = self.depth + 1;
390        ProcessingState {
391            parent: Some(BoxCow::Owned(self.into())),
392            path_item: Some(PathItem::OwnedKey(key)),
393            attrs,
394            value_type: value_type.into_iter().collect(),
395            depth,
396        }
397    }
398
399    /// Derives a processing state by entering an index.
400    pub fn enter_index(
401        &'a self,
402        idx: usize,
403        attrs: Option<Cow<'a, FieldAttrs>>,
404        value_type: impl IntoIterator<Item = ValueType>,
405    ) -> Self {
406        ProcessingState {
407            parent: Some(BoxCow::Borrowed(self)),
408            path_item: Some(PathItem::Index(idx)),
409            attrs,
410            value_type: value_type.into_iter().collect(),
411            depth: self.depth + 1,
412        }
413    }
414
415    /// Derives a processing state without adding a path segment. Useful in newtype structs.
416    pub fn enter_nothing(&'a self, attrs: Option<Cow<'a, FieldAttrs>>) -> Self {
417        ProcessingState {
418            attrs,
419            path_item: None,
420            parent: Some(BoxCow::Borrowed(self)),
421            ..self.clone()
422        }
423    }
424
425    /// Returns the path in the processing state.
426    pub fn path(&'a self) -> Path<'a> {
427        Path(self)
428    }
429
430    pub fn value_type(&self) -> EnumSet<ValueType> {
431        self.value_type
432    }
433
434    /// Returns the field attributes.
435    pub fn attrs(&self) -> &FieldAttrs {
436        match self.attrs {
437            Some(ref cow) => cow,
438            None => &DEFAULT_FIELD_ATTRS,
439        }
440    }
441
442    /// Derives the attrs for recursion.
443    pub fn inner_attrs(&self) -> Option<Cow<'_, FieldAttrs>> {
444        match self.pii() {
445            Pii::True => Some(Cow::Borrowed(&PII_TRUE_FIELD_ATTRS)),
446            Pii::False => None,
447            Pii::Maybe => Some(Cow::Borrowed(&PII_MAYBE_FIELD_ATTRS)),
448        }
449    }
450
451    /// Returns the PII status for this state.
452    ///
453    /// If the state's `FieldAttrs` contain a fixed PII status,
454    /// it is returned. If they contain a dynamic PII status (a function),
455    /// it is applied to this state and the output returned.
456    pub fn pii(&self) -> Pii {
457        match self.attrs().pii {
458            PiiMode::Static(pii) => pii,
459            PiiMode::Dynamic(pii_fn) => pii_fn(self),
460        }
461    }
462
463    /// Iterates through this state and all its ancestors up the hierarchy.
464    ///
465    /// This starts at the top of the stack of processing states and ends at the root.  Thus
466    /// the first item returned is the currently visited leaf of the event structure.
467    pub fn iter(&'a self) -> ProcessingStateIter<'a> {
468        ProcessingStateIter {
469            state: Some(self),
470            size: self.depth,
471        }
472    }
473
474    /// Returns the contained parent state.
475    ///
476    /// - Returns `Ok(None)` if the current state is the root.
477    /// - Returns `Err(self)` if the current state does not own the parent state.
478    #[expect(
479        clippy::result_large_err,
480        reason = "this method returns `self` in the error case"
481    )]
482    pub fn try_into_parent(self) -> Result<Option<Self>, Self> {
483        match self.parent {
484            Some(BoxCow::Borrowed(_)) => Err(self),
485            Some(BoxCow::Owned(parent)) => Ok(Some(*parent)),
486            None => Ok(None),
487        }
488    }
489
490    /// Return the depth (~ indentation level) of the currently processed value.
491    pub fn depth(&'a self) -> usize {
492        self.depth
493    }
494
495    /// Return whether the depth changed between parent and self.
496    ///
497    /// This is `false` when we entered a newtype struct.
498    pub fn entered_anything(&'a self) -> bool {
499        if let Some(parent) = &self.parent {
500            parent.depth() != self.depth()
501        } else {
502            true
503        }
504    }
505
506    /// Returns the last path item if there is one. Skips over "dummy" path segments that exist
507    /// because of newtypes.
508    #[inline]
509    fn path_item(&self) -> Option<&PathItem<'_>> {
510        for state in self.iter() {
511            if let Some(ref path_item) = state.path_item {
512                return Some(path_item);
513            }
514        }
515        None
516    }
517}
518
519pub struct ProcessingStateIter<'a> {
520    state: Option<&'a ProcessingState<'a>>,
521    size: usize,
522}
523
524impl<'a> Iterator for ProcessingStateIter<'a> {
525    type Item = &'a ProcessingState<'a>;
526
527    fn next(&mut self) -> Option<Self::Item> {
528        let current = self.state?;
529        self.state = current.parent.as_deref();
530        Some(current)
531    }
532
533    fn size_hint(&self) -> (usize, Option<usize>) {
534        (self.size, Some(self.size))
535    }
536}
537
538impl ExactSizeIterator for ProcessingStateIter<'_> {}
539
540impl Default for ProcessingState<'_> {
541    fn default() -> Self {
542        ProcessingState::root().clone()
543    }
544}
545
546/// Represents the [`ProcessingState`] as a path.
547///
548/// This is a view of a [`ProcessingState`] which treats the stack of states as a path.
549#[derive(Debug)]
550pub struct Path<'a>(&'a ProcessingState<'a>);
551
552impl Path<'_> {
553    /// Returns the current key if there is one
554    #[inline]
555    pub fn key(&self) -> Option<&str> {
556        PathItem::key(self.0.path_item()?)
557    }
558
559    /// Returns the current index if there is one
560    #[inline]
561    pub fn index(&self) -> Option<usize> {
562        PathItem::index(self.0.path_item()?)
563    }
564
565    /// Return the depth (~ indentation level) of the currently processed value.
566    pub fn depth(&self) -> usize {
567        self.0.depth()
568    }
569
570    /// Returns the field attributes of the current path item.
571    pub fn attrs(&self) -> &FieldAttrs {
572        self.0.attrs()
573    }
574
575    /// Returns the PII status for this path.
576    pub fn pii(&self) -> Pii {
577        self.0.pii()
578    }
579
580    /// Iterates through the states in this path.
581    pub fn iter(&self) -> ProcessingStateIter<'_> {
582        self.0.iter()
583    }
584}
585
586impl fmt::Display for Path<'_> {
587    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
588        let mut items = Vec::with_capacity(self.0.depth);
589        for state in self.0.iter() {
590            if let Some(ref path_item) = state.path_item {
591                items.push(path_item)
592            }
593        }
594
595        for (idx, item) in items.into_iter().rev().enumerate() {
596            if idx > 0 {
597                write!(f, ".")?;
598            }
599            write!(f, "{item}")?;
600        }
601        Ok(())
602    }
603}
604
605#[cfg(test)]
606mod tests {
607
608    use relay_protocol::{Annotated, Empty, FromValue, IntoValue, Object, SerializableAnnotated};
609
610    use crate::processor::attrs::ROOT_STATE;
611    use crate::processor::{Pii, ProcessValue, ProcessingState, Processor, process_value};
612
613    fn pii_from_item_name(state: &ProcessingState) -> Pii {
614        match state.path_item().and_then(|p| p.key()) {
615            Some("true_item") => Pii::True,
616            Some("false_item") => Pii::False,
617            _ => Pii::Maybe,
618        }
619    }
620
621    #[derive(Debug, Clone, Empty, IntoValue, FromValue, ProcessValue)]
622    #[metastructure(pii = "pii_from_item_name")]
623    struct TestValue(String);
624
625    struct TestProcessor;
626
627    impl Processor for TestProcessor {
628        fn process_string(
629            &mut self,
630            value: &mut String,
631            _meta: &mut relay_protocol::Meta,
632            state: &ProcessingState<'_>,
633        ) -> crate::processor::ProcessingResult where {
634            match state.pii() {
635                Pii::True => *value = "true".to_owned(),
636                Pii::False => *value = "false".to_owned(),
637                Pii::Maybe => *value = "maybe".to_owned(),
638            }
639            Ok(())
640        }
641    }
642
643    #[test]
644    fn test_dynamic_pii() {
645        let mut object: Annotated<Object<TestValue>> = Annotated::from_json(
646            r#"
647        {
648          "false_item": "replace me",
649          "other_item": "replace me",
650          "true_item": "replace me"
651        }
652        "#,
653        )
654        .unwrap();
655
656        process_value(&mut object, &mut TestProcessor, &ROOT_STATE).unwrap();
657
658        insta::assert_json_snapshot!(SerializableAnnotated(&object), @r###"
659        {
660          "false_item": "false",
661          "other_item": "maybe",
662          "true_item": "true"
663        }
664        "###);
665    }
666}