relay_event_schema/processor/
attrs.rs

1use std::borrow::Cow;
2use std::fmt;
3use std::ops::{Deref, RangeInclusive};
4
5use enumset::{EnumSet, EnumSetType};
6use relay_protocol::Annotated;
7
8use crate::processor::ProcessValue;
9
10/// Error for unknown value types.
11#[derive(Debug)]
12pub struct UnknownValueTypeError;
13
14impl fmt::Display for UnknownValueTypeError {
15    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
16        write!(f, "unknown value type")
17    }
18}
19
20impl std::error::Error for UnknownValueTypeError {}
21
22/// The (simplified) type of a value.
23#[derive(Debug, Ord, PartialOrd, EnumSetType)]
24pub enum ValueType {
25    // Basic types
26    String,
27    Binary,
28    Number,
29    Boolean,
30    DateTime,
31    Array,
32    Object,
33
34    // Roots
35    Event,
36    Attachments,
37    Replay,
38
39    // Protocol types
40    Exception,
41    Stacktrace,
42    Frame,
43    Request,
44    User,
45    LogEntry,
46    Message,
47    Thread,
48    Breadcrumb,
49    OurLog,
50    TraceMetric,
51    Span,
52    ClientSdkInfo,
53
54    // Attachments and Contents
55    Minidump,
56    HeapMemory,
57    StackMemory,
58}
59
60impl ValueType {
61    pub fn for_field<T: ProcessValue>(field: &Annotated<T>) -> EnumSet<Self> {
62        field
63            .value()
64            .map(ProcessValue::value_type)
65            .unwrap_or_else(EnumSet::empty)
66    }
67}
68
69relay_common::derive_fromstr_and_display!(ValueType, UnknownValueTypeError, {
70    ValueType::String => "string",
71    ValueType::Binary => "binary",
72    ValueType::Number => "number",
73    ValueType::Boolean => "boolean" | "bool",
74    ValueType::DateTime => "datetime",
75    ValueType::Array => "array" | "list",
76    ValueType::Object => "object",
77    ValueType::Event => "event",
78    ValueType::Attachments => "attachments",
79    ValueType::Replay => "replay",
80    ValueType::Exception => "error" | "exception",
81    ValueType::Stacktrace => "stack" | "stacktrace",
82    ValueType::Frame => "frame",
83    ValueType::Request => "http" | "request",
84    ValueType::User => "user",
85    ValueType::LogEntry => "logentry",
86    ValueType::Message => "message",
87    ValueType::Thread => "thread",
88    ValueType::Breadcrumb => "breadcrumb",
89    ValueType::OurLog => "log",
90    ValueType::TraceMetric => "trace_metric",
91
92    ValueType::Span => "span",
93    ValueType::ClientSdkInfo => "sdk",
94    ValueType::Minidump => "minidump",
95    ValueType::HeapMemory => "heap_memory",
96    ValueType::StackMemory => "stack_memory",
97});
98
99/// Whether an attribute should be PII-strippable/should be subject to datascrubbers
100#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
101pub enum Pii {
102    /// The field will be stripped by default
103    True,
104    /// The field cannot be stripped at all
105    False,
106    /// The field will only be stripped when addressed with a specific path selector, but generic
107    /// selectors such as `$string` do not apply.
108    Maybe,
109}
110
111/// A static or dynamic `Pii` value.
112#[derive(Debug, Clone, Copy)]
113pub enum PiiMode {
114    /// A static value.
115    Static(Pii),
116    /// A dynamic value, computed based on a `ProcessingState`.
117    Dynamic(fn(&ProcessingState) -> Pii),
118}
119
120/// Meta information about a field.
121#[derive(Debug, Clone, Copy)]
122pub struct FieldAttrs {
123    /// Optionally the name of the field.
124    pub name: Option<&'static str>,
125    /// If the field is required.
126    pub required: bool,
127    /// If the field should be non-empty.
128    pub nonempty: bool,
129    /// Whether to trim whitespace from this string.
130    pub trim_whitespace: bool,
131    /// A set of allowed or denied character ranges for this string.
132    pub characters: Option<CharacterSet>,
133    /// The maximum char length of this field.
134    pub max_chars: Option<usize>,
135    /// The extra char length allowance on top of max_chars.
136    pub max_chars_allowance: usize,
137    /// The maximum depth of this field.
138    pub max_depth: Option<usize>,
139    /// The maximum number of bytes of this field.
140    pub max_bytes: Option<usize>,
141    /// The type of PII on the field.
142    pub pii: PiiMode,
143    /// Whether additional properties should be retained during normalization.
144    pub retain: bool,
145    /// Whether the trimming processor is allowed to shorten or drop this field.
146    pub trim: bool,
147}
148
149/// A set of characters allowed or denied for a (string) field.
150///
151/// Note that this field is generated in the derive, it can't be constructed easily in tests.
152#[derive(Clone, Copy)]
153pub struct CharacterSet {
154    /// Generated in derive for performance. Can be left out when set is created manually.
155    pub char_is_valid: fn(char) -> bool,
156    /// A set of ranges that are allowed/denied within the character set
157    pub ranges: &'static [RangeInclusive<char>],
158    /// Whether the character set is inverted
159    pub is_negative: bool,
160}
161
162impl fmt::Debug for CharacterSet {
163    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
164        f.debug_struct("CharacterSet")
165            .field("ranges", &self.ranges)
166            .field("is_negative", &self.is_negative)
167            .finish()
168    }
169}
170
171impl FieldAttrs {
172    /// Creates default `FieldAttrs`.
173    pub const fn new() -> Self {
174        FieldAttrs {
175            name: None,
176            required: false,
177            nonempty: false,
178            trim_whitespace: false,
179            characters: None,
180            max_chars: None,
181            max_chars_allowance: 0,
182            max_depth: None,
183            max_bytes: None,
184            pii: PiiMode::Static(Pii::False),
185            retain: false,
186            trim: true,
187        }
188    }
189
190    /// Sets whether a value in this field is required.
191    pub const fn required(mut self, required: bool) -> Self {
192        self.required = required;
193        self
194    }
195
196    /// Sets whether this field can have an empty value.
197    ///
198    /// This is distinct from `required`. An empty string (`""`) passes the "required" check but not the
199    /// "nonempty" one.
200    pub const fn nonempty(mut self, nonempty: bool) -> Self {
201        self.nonempty = nonempty;
202        self
203    }
204
205    /// Sets whether whitespace should be trimmed before validation.
206    pub const fn trim_whitespace(mut self, trim_whitespace: bool) -> Self {
207        self.trim_whitespace = trim_whitespace;
208        self
209    }
210
211    /// Sets whether this field contains PII.
212    pub const fn pii(mut self, pii: Pii) -> Self {
213        self.pii = PiiMode::Static(pii);
214        self
215    }
216
217    /// Sets whether this field contains PII dynamically based on the current state.
218    pub const fn pii_dynamic(mut self, pii: fn(&ProcessingState) -> Pii) -> Self {
219        self.pii = PiiMode::Dynamic(pii);
220        self
221    }
222
223    /// Sets the maximum number of characters allowed in the field.
224    pub const fn max_chars(mut self, max_chars: usize) -> Self {
225        self.max_chars = Some(max_chars);
226        self
227    }
228
229    /// Sets whether additional properties should be retained during normalization.
230    pub const fn retain(mut self, retain: bool) -> Self {
231        self.retain = retain;
232        self
233    }
234}
235
236static DEFAULT_FIELD_ATTRS: FieldAttrs = FieldAttrs::new();
237static PII_TRUE_FIELD_ATTRS: FieldAttrs = FieldAttrs::new().pii(Pii::True);
238static PII_MAYBE_FIELD_ATTRS: FieldAttrs = FieldAttrs::new().pii(Pii::Maybe);
239
240impl Default for FieldAttrs {
241    fn default() -> Self {
242        Self::new()
243    }
244}
245
246#[derive(Debug, Clone, Eq, Ord, PartialOrd)]
247enum PathItem<'a> {
248    StaticKey(&'a str),
249    OwnedKey(String),
250    Index(usize),
251}
252
253impl<'a> PartialEq for PathItem<'a> {
254    fn eq(&self, other: &PathItem<'a>) -> bool {
255        self.key() == other.key() && self.index() == other.index()
256    }
257}
258
259impl PathItem<'_> {
260    /// Returns the key if there is one
261    #[inline]
262    pub fn key(&self) -> Option<&str> {
263        match self {
264            PathItem::StaticKey(s) => Some(s),
265            PathItem::OwnedKey(s) => Some(s.as_str()),
266            PathItem::Index(_) => None,
267        }
268    }
269
270    /// Returns the index if there is one
271    #[inline]
272    pub fn index(&self) -> Option<usize> {
273        match self {
274            PathItem::StaticKey(_) | PathItem::OwnedKey(_) => None,
275            PathItem::Index(idx) => Some(*idx),
276        }
277    }
278}
279
280impl fmt::Display for PathItem<'_> {
281    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282        match self {
283            PathItem::StaticKey(s) => f.pad(s),
284            PathItem::OwnedKey(s) => f.pad(s.as_str()),
285            PathItem::Index(val) => write!(f, "{val}"),
286        }
287    }
288}
289
290/// Like [`std::borrow::Cow`], but with a boxed value.
291///
292/// This is useful for types that contain themselves, where otherwise the layout of the type
293/// cannot be computed, for example
294///
295/// ```rust,ignore
296/// struct Foo<'a>(Cow<'a, Foo<'a>>); // will not compile
297/// struct Bar<'a>(BoxCow<'a, Bar<'a>>); // will compile
298/// ```
299#[derive(Debug, Clone)]
300enum BoxCow<'a, T> {
301    Borrowed(&'a T),
302    Owned(Box<T>),
303}
304
305impl<T> Deref for BoxCow<'_, T> {
306    type Target = T;
307
308    fn deref(&self) -> &Self::Target {
309        match self {
310            BoxCow::Borrowed(inner) => inner,
311            BoxCow::Owned(inner) => inner.deref(),
312        }
313    }
314}
315
316/// An event's processing state.
317///
318/// The processing state describes an item in an event which is being processed, an example
319/// of processing might be scrubbing the event for PII.  The processing state itself
320/// describes the current item and it's parent, which allows you to follow all the items up
321/// to the root item.  You can think of processing an event as a visitor pattern visiting
322/// all items in the event and the processing state is a stack describing the currently
323/// visited item and all it's parents.
324#[derive(Debug, Clone)]
325pub struct ProcessingState<'a> {
326    // In event scrubbing, every state holds a reference to its parent.
327    // In Replay scrubbing, we do not call `process_*` recursively,
328    // but instead hold a single `ProcessingState` that represents the current item.
329    // This item owns its parent (plus ancestors) exclusively, which is why we use `BoxCow` here
330    // rather than `Rc` / `Arc`.
331    parent: Option<BoxCow<'a, ProcessingState<'a>>>,
332    path_item: Option<PathItem<'a>>,
333    attrs: Option<Cow<'a, FieldAttrs>>,
334    value_type: EnumSet<ValueType>,
335    depth: usize,
336}
337
338static ROOT_STATE: ProcessingState = ProcessingState {
339    parent: None,
340    path_item: None,
341    attrs: None,
342    value_type: enumset::enum_set!(),
343    depth: 0,
344};
345
346impl<'a> ProcessingState<'a> {
347    /// Returns the root processing state.
348    pub fn root() -> &'static ProcessingState<'static> {
349        &ROOT_STATE
350    }
351
352    /// Creates a new root state.
353    pub fn new_root(
354        attrs: Option<Cow<'static, FieldAttrs>>,
355        value_type: impl IntoIterator<Item = ValueType>,
356    ) -> ProcessingState<'static> {
357        ProcessingState {
358            parent: None,
359            path_item: None,
360            attrs,
361            value_type: value_type.into_iter().collect(),
362            depth: 0,
363        }
364    }
365
366    /// Derives a processing state by entering a borrowed key.
367    pub fn enter_borrowed(
368        &'a self,
369        key: &'a str,
370        attrs: Option<Cow<'a, FieldAttrs>>,
371        value_type: impl IntoIterator<Item = ValueType>,
372    ) -> Self {
373        ProcessingState {
374            parent: Some(BoxCow::Borrowed(self)),
375            path_item: Some(PathItem::StaticKey(key)),
376            attrs,
377            value_type: value_type.into_iter().collect(),
378            depth: self.depth + 1,
379        }
380    }
381
382    /// Derives a processing state by entering an owned key.
383    ///
384    /// The new (child) state takes ownership of the current (parent) state.
385    pub fn enter_owned(
386        self,
387        key: String,
388        attrs: Option<Cow<'a, FieldAttrs>>,
389        value_type: impl IntoIterator<Item = ValueType>,
390    ) -> Self {
391        let depth = self.depth + 1;
392        ProcessingState {
393            parent: Some(BoxCow::Owned(self.into())),
394            path_item: Some(PathItem::OwnedKey(key)),
395            attrs,
396            value_type: value_type.into_iter().collect(),
397            depth,
398        }
399    }
400
401    /// Derives a processing state by entering an index.
402    pub fn enter_index(
403        &'a self,
404        idx: usize,
405        attrs: Option<Cow<'a, FieldAttrs>>,
406        value_type: impl IntoIterator<Item = ValueType>,
407    ) -> Self {
408        ProcessingState {
409            parent: Some(BoxCow::Borrowed(self)),
410            path_item: Some(PathItem::Index(idx)),
411            attrs,
412            value_type: value_type.into_iter().collect(),
413            depth: self.depth + 1,
414        }
415    }
416
417    /// Derives a processing state without adding a path segment. Useful in newtype structs.
418    pub fn enter_nothing(&'a self, attrs: Option<Cow<'a, FieldAttrs>>) -> Self {
419        ProcessingState {
420            attrs,
421            path_item: None,
422            parent: Some(BoxCow::Borrowed(self)),
423            ..self.clone()
424        }
425    }
426
427    /// Returns the path in the processing state.
428    pub fn path(&'a self) -> Path<'a> {
429        Path(self)
430    }
431
432    pub fn value_type(&self) -> EnumSet<ValueType> {
433        self.value_type
434    }
435
436    /// Returns the field attributes.
437    pub fn attrs(&self) -> &FieldAttrs {
438        match self.attrs {
439            Some(ref cow) => cow,
440            None => &DEFAULT_FIELD_ATTRS,
441        }
442    }
443
444    /// Derives the attrs for recursion.
445    pub fn inner_attrs(&self) -> Option<Cow<'_, FieldAttrs>> {
446        match self.pii() {
447            Pii::True => Some(Cow::Borrowed(&PII_TRUE_FIELD_ATTRS)),
448            Pii::False => None,
449            Pii::Maybe => Some(Cow::Borrowed(&PII_MAYBE_FIELD_ATTRS)),
450        }
451    }
452
453    /// Returns the PII status for this state.
454    ///
455    /// If the state's `FieldAttrs` contain a fixed PII status,
456    /// it is returned. If they contain a dynamic PII status (a function),
457    /// it is applied to this state and the output returned.
458    pub fn pii(&self) -> Pii {
459        match self.attrs().pii {
460            PiiMode::Static(pii) => pii,
461            PiiMode::Dynamic(pii_fn) => pii_fn(self),
462        }
463    }
464
465    /// Iterates through this state and all its ancestors up the hierarchy.
466    ///
467    /// This starts at the top of the stack of processing states and ends at the root.  Thus
468    /// the first item returned is the currently visited leaf of the event structure.
469    pub fn iter(&'a self) -> ProcessingStateIter<'a> {
470        ProcessingStateIter {
471            state: Some(self),
472            size: self.depth,
473        }
474    }
475
476    /// Returns the contained parent state.
477    ///
478    /// - Returns `Ok(None)` if the current state is the root.
479    /// - Returns `Err(self)` if the current state does not own the parent state.
480    #[expect(
481        clippy::result_large_err,
482        reason = "this method returns `self` in the error case"
483    )]
484    pub fn try_into_parent(self) -> Result<Option<Self>, Self> {
485        match self.parent {
486            Some(BoxCow::Borrowed(_)) => Err(self),
487            Some(BoxCow::Owned(parent)) => Ok(Some(*parent)),
488            None => Ok(None),
489        }
490    }
491
492    /// Return the depth (~ indentation level) of the currently processed value.
493    pub fn depth(&'a self) -> usize {
494        self.depth
495    }
496
497    /// Return whether the depth changed between parent and self.
498    ///
499    /// This is `false` when we entered a newtype struct.
500    pub fn entered_anything(&'a self) -> bool {
501        if let Some(parent) = &self.parent {
502            parent.depth() != self.depth()
503        } else {
504            true
505        }
506    }
507
508    /// Returns an iterator over the "keys" in this state,
509    /// in order from right to left (or innermost state to outermost).
510    pub fn keys(&self) -> impl Iterator<Item = &str> {
511        self.iter()
512            .filter_map(|state| state.path_item.as_ref())
513            .flat_map(|item| item.key())
514    }
515
516    /// Returns the last path item if there is one. Skips over "dummy" path segments that exist
517    /// because of newtypes.
518    #[inline]
519    fn path_item(&self) -> Option<&PathItem<'_>> {
520        for state in self.iter() {
521            if let Some(ref path_item) = state.path_item {
522                return Some(path_item);
523            }
524        }
525        None
526    }
527}
528
529pub struct ProcessingStateIter<'a> {
530    state: Option<&'a ProcessingState<'a>>,
531    size: usize,
532}
533
534impl<'a> Iterator for ProcessingStateIter<'a> {
535    type Item = &'a ProcessingState<'a>;
536
537    fn next(&mut self) -> Option<Self::Item> {
538        let current = self.state?;
539        self.state = current.parent.as_deref();
540        Some(current)
541    }
542
543    fn size_hint(&self) -> (usize, Option<usize>) {
544        (self.size, Some(self.size))
545    }
546}
547
548impl ExactSizeIterator for ProcessingStateIter<'_> {}
549
550impl Default for ProcessingState<'_> {
551    fn default() -> Self {
552        ProcessingState::root().clone()
553    }
554}
555
556/// Represents the [`ProcessingState`] as a path.
557///
558/// This is a view of a [`ProcessingState`] which treats the stack of states as a path.
559#[derive(Debug)]
560pub struct Path<'a>(&'a ProcessingState<'a>);
561
562impl Path<'_> {
563    /// Returns the current key if there is one
564    #[inline]
565    pub fn key(&self) -> Option<&str> {
566        PathItem::key(self.0.path_item()?)
567    }
568
569    /// Returns the current index if there is one
570    #[inline]
571    pub fn index(&self) -> Option<usize> {
572        PathItem::index(self.0.path_item()?)
573    }
574
575    /// Return the depth (~ indentation level) of the currently processed value.
576    pub fn depth(&self) -> usize {
577        self.0.depth()
578    }
579
580    /// Returns the field attributes of the current path item.
581    pub fn attrs(&self) -> &FieldAttrs {
582        self.0.attrs()
583    }
584
585    /// Returns the PII status for this path.
586    pub fn pii(&self) -> Pii {
587        self.0.pii()
588    }
589
590    /// Iterates through the states in this path.
591    pub fn iter(&self) -> ProcessingStateIter<'_> {
592        self.0.iter()
593    }
594}
595
596impl fmt::Display for Path<'_> {
597    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
598        let mut items = Vec::with_capacity(self.0.depth);
599        for state in self.0.iter() {
600            if let Some(ref path_item) = state.path_item {
601                items.push(path_item)
602            }
603        }
604
605        for (idx, item) in items.into_iter().rev().enumerate() {
606            if idx > 0 {
607                write!(f, ".")?;
608            }
609            write!(f, "{item}")?;
610        }
611        Ok(())
612    }
613}
614
615#[cfg(test)]
616mod tests {
617
618    use relay_protocol::{Annotated, Empty, FromValue, IntoValue, Object, SerializableAnnotated};
619
620    use crate::processor::attrs::ROOT_STATE;
621    use crate::processor::{Pii, ProcessValue, ProcessingState, Processor, process_value};
622
623    fn pii_from_item_name(state: &ProcessingState) -> Pii {
624        match state.path_item().and_then(|p| p.key()) {
625            Some("true_item") => Pii::True,
626            Some("false_item") => Pii::False,
627            _ => Pii::Maybe,
628        }
629    }
630
631    #[derive(Debug, Clone, Empty, IntoValue, FromValue, ProcessValue)]
632    #[metastructure(pii = "pii_from_item_name")]
633    struct TestValue(String);
634
635    struct TestProcessor;
636
637    impl Processor for TestProcessor {
638        fn process_string(
639            &mut self,
640            value: &mut String,
641            _meta: &mut relay_protocol::Meta,
642            state: &ProcessingState<'_>,
643        ) -> crate::processor::ProcessingResult where {
644            match state.pii() {
645                Pii::True => *value = "true".to_owned(),
646                Pii::False => *value = "false".to_owned(),
647                Pii::Maybe => *value = "maybe".to_owned(),
648            }
649            Ok(())
650        }
651    }
652
653    #[test]
654    fn test_dynamic_pii() {
655        let mut object: Annotated<Object<TestValue>> = Annotated::from_json(
656            r#"
657        {
658          "false_item": "replace me",
659          "other_item": "replace me",
660          "true_item": "replace me"
661        }
662        "#,
663        )
664        .unwrap();
665
666        process_value(&mut object, &mut TestProcessor, &ROOT_STATE).unwrap();
667
668        insta::assert_json_snapshot!(SerializableAnnotated(&object), @r###"
669        {
670          "false_item": "false",
671          "other_item": "maybe",
672          "true_item": "true"
673        }
674        "###);
675    }
676}