relay_event_schema/processor/
attrs.rs

1use std::borrow::Cow;
2use std::fmt;
3use std::ops::{Deref, RangeInclusive};
4
5use enumset::{EnumSet, EnumSetType};
6use relay_protocol::Annotated;
7
8use crate::processor::ProcessValue;
9
10/// Error for unknown value types.
11#[derive(Debug)]
12pub struct UnknownValueTypeError;
13
14impl fmt::Display for UnknownValueTypeError {
15    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
16        write!(f, "unknown value type")
17    }
18}
19
20impl std::error::Error for UnknownValueTypeError {}
21
22/// The (simplified) type of a value.
23#[derive(Debug, Ord, PartialOrd, EnumSetType)]
24pub enum ValueType {
25    // Basic types
26    String,
27    Binary,
28    Number,
29    Boolean,
30    DateTime,
31    Array,
32    Object,
33
34    // Roots
35    Event,
36    Attachments,
37    Replay,
38
39    // Protocol types
40    Exception,
41    Stacktrace,
42    Frame,
43    Request,
44    User,
45    LogEntry,
46    Message,
47    Thread,
48    Breadcrumb,
49    OurLog,
50    Span,
51    ClientSdkInfo,
52
53    // Attachments and Contents
54    Minidump,
55    HeapMemory,
56    StackMemory,
57}
58
59impl ValueType {
60    pub fn for_field<T: ProcessValue>(field: &Annotated<T>) -> EnumSet<Self> {
61        field
62            .value()
63            .map(ProcessValue::value_type)
64            .unwrap_or_else(EnumSet::empty)
65    }
66}
67
68relay_common::derive_fromstr_and_display!(ValueType, UnknownValueTypeError, {
69    ValueType::String => "string",
70    ValueType::Binary => "binary",
71    ValueType::Number => "number",
72    ValueType::Boolean => "boolean" | "bool",
73    ValueType::DateTime => "datetime",
74    ValueType::Array => "array" | "list",
75    ValueType::Object => "object",
76    ValueType::Event => "event",
77    ValueType::Attachments => "attachments",
78    ValueType::Replay => "replay",
79    ValueType::Exception => "error" | "exception",
80    ValueType::Stacktrace => "stack" | "stacktrace",
81    ValueType::Frame => "frame",
82    ValueType::Request => "http" | "request",
83    ValueType::User => "user",
84    ValueType::LogEntry => "logentry",
85    ValueType::Message => "message",
86    ValueType::Thread => "thread",
87    ValueType::Breadcrumb => "breadcrumb",
88    ValueType::OurLog => "ourlog",
89    ValueType::Span => "span",
90    ValueType::ClientSdkInfo => "sdk",
91    ValueType::Minidump => "minidump",
92    ValueType::HeapMemory => "heap_memory",
93    ValueType::StackMemory => "stack_memory",
94});
95
96/// Whether an attribute should be PII-strippable/should be subject to datascrubbers
97#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
98pub enum Pii {
99    /// The field will be stripped by default
100    True,
101    /// The field cannot be stripped at all
102    False,
103    /// The field will only be stripped when addressed with a specific path selector, but generic
104    /// selectors such as `$string` do not apply.
105    Maybe,
106}
107
108/// Meta information about a field.
109#[derive(Debug, Clone, Copy)]
110pub struct FieldAttrs {
111    /// Optionally the name of the field.
112    pub name: Option<&'static str>,
113    /// If the field is required.
114    pub required: bool,
115    /// If the field should be non-empty.
116    pub nonempty: bool,
117    /// Whether to trim whitespace from this string.
118    pub trim_whitespace: bool,
119    /// A set of allowed or denied character ranges for this string.
120    pub characters: Option<CharacterSet>,
121    /// The maximum char length of this field.
122    pub max_chars: Option<usize>,
123    /// The extra char length allowance on top of max_chars.
124    pub max_chars_allowance: usize,
125    /// The maximum depth of this field.
126    pub max_depth: Option<usize>,
127    /// The maximum number of bytes of this field.
128    pub max_bytes: Option<usize>,
129    /// The type of PII on the field.
130    pub pii: Pii,
131    /// Whether additional properties should be retained during normalization.
132    pub retain: bool,
133    /// Whether the trimming processor is allowed to shorten or drop this field.
134    pub trim: bool,
135}
136
137/// A set of characters allowed or denied for a (string) field.
138///
139/// Note that this field is generated in the derive, it can't be constructed easily in tests.
140#[derive(Clone, Copy)]
141pub struct CharacterSet {
142    /// Generated in derive for performance. Can be left out when set is created manually.
143    pub char_is_valid: fn(char) -> bool,
144    /// A set of ranges that are allowed/denied within the character set
145    pub ranges: &'static [RangeInclusive<char>],
146    /// Whether the character set is inverted
147    pub is_negative: bool,
148}
149
150impl fmt::Debug for CharacterSet {
151    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
152        f.debug_struct("CharacterSet")
153            .field("ranges", &self.ranges)
154            .field("is_negative", &self.is_negative)
155            .finish()
156    }
157}
158
159impl FieldAttrs {
160    /// Creates default `FieldAttrs`.
161    pub const fn new() -> Self {
162        FieldAttrs {
163            name: None,
164            required: false,
165            nonempty: false,
166            trim_whitespace: false,
167            characters: None,
168            max_chars: None,
169            max_chars_allowance: 0,
170            max_depth: None,
171            max_bytes: None,
172            pii: Pii::False,
173            retain: false,
174            trim: true,
175        }
176    }
177
178    /// Sets whether a value in this field is required.
179    pub const fn required(mut self, required: bool) -> Self {
180        self.required = required;
181        self
182    }
183
184    /// Sets whether this field can have an empty value.
185    ///
186    /// This is distinct from `required`. An empty string (`""`) passes the "required" check but not the
187    /// "nonempty" one.
188    pub const fn nonempty(mut self, nonempty: bool) -> Self {
189        self.nonempty = nonempty;
190        self
191    }
192
193    /// Sets whether whitespace should be trimmed before validation.
194    pub const fn trim_whitespace(mut self, trim_whitespace: bool) -> Self {
195        self.trim_whitespace = trim_whitespace;
196        self
197    }
198
199    /// Sets whether this field contains PII.
200    pub const fn pii(mut self, pii: Pii) -> Self {
201        self.pii = pii;
202        self
203    }
204
205    /// Sets the maximum number of characters allowed in the field.
206    pub const fn max_chars(mut self, max_chars: usize) -> Self {
207        self.max_chars = Some(max_chars);
208        self
209    }
210
211    /// Sets whether additional properties should be retained during normalization.
212    pub const fn retain(mut self, retain: bool) -> Self {
213        self.retain = retain;
214        self
215    }
216}
217
218static DEFAULT_FIELD_ATTRS: FieldAttrs = FieldAttrs::new();
219static PII_TRUE_FIELD_ATTRS: FieldAttrs = FieldAttrs::new().pii(Pii::True);
220static PII_MAYBE_FIELD_ATTRS: FieldAttrs = FieldAttrs::new().pii(Pii::Maybe);
221
222impl Default for FieldAttrs {
223    fn default() -> Self {
224        Self::new()
225    }
226}
227
228#[derive(Debug, Clone, Eq, Ord, PartialOrd)]
229enum PathItem<'a> {
230    StaticKey(&'a str),
231    OwnedKey(String),
232    Index(usize),
233}
234
235impl<'a> PartialEq for PathItem<'a> {
236    fn eq(&self, other: &PathItem<'a>) -> bool {
237        self.key() == other.key() && self.index() == other.index()
238    }
239}
240
241impl PathItem<'_> {
242    /// Returns the key if there is one
243    #[inline]
244    pub fn key(&self) -> Option<&str> {
245        match self {
246            PathItem::StaticKey(s) => Some(s),
247            PathItem::OwnedKey(s) => Some(s.as_str()),
248            PathItem::Index(_) => None,
249        }
250    }
251
252    /// Returns the index if there is one
253    #[inline]
254    pub fn index(&self) -> Option<usize> {
255        match self {
256            PathItem::StaticKey(_) | PathItem::OwnedKey(_) => None,
257            PathItem::Index(idx) => Some(*idx),
258        }
259    }
260}
261
262impl fmt::Display for PathItem<'_> {
263    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
264        match self {
265            PathItem::StaticKey(s) => f.pad(s),
266            PathItem::OwnedKey(s) => f.pad(s.as_str()),
267            PathItem::Index(val) => write!(f, "{val}"),
268        }
269    }
270}
271
272/// Like [`std::borrow::Cow`], but with a boxed value.
273///
274/// This is useful for types that contain themselves, where otherwise the layout of the type
275/// cannot be computed, for example
276///
277/// ```rust,ignore
278/// struct Foo<'a>(Cow<'a, Foo<'a>>); // will not compile
279/// struct Bar<'a>(BoxCow<'a, Bar<'a>>); // will compile
280/// ```
281#[derive(Debug, Clone)]
282enum BoxCow<'a, T> {
283    Borrowed(&'a T),
284    Owned(Box<T>),
285}
286
287impl<T> Deref for BoxCow<'_, T> {
288    type Target = T;
289
290    fn deref(&self) -> &Self::Target {
291        match self {
292            BoxCow::Borrowed(inner) => inner,
293            BoxCow::Owned(inner) => inner.deref(),
294        }
295    }
296}
297
298/// An event's processing state.
299///
300/// The processing state describes an item in an event which is being processed, an example
301/// of processing might be scrubbing the event for PII.  The processing state itself
302/// describes the current item and it's parent, which allows you to follow all the items up
303/// to the root item.  You can think of processing an event as a visitor pattern visiting
304/// all items in the event and the processing state is a stack describing the currently
305/// visited item and all it's parents.
306#[derive(Debug, Clone)]
307pub struct ProcessingState<'a> {
308    // In event scrubbing, every state holds a reference to its parent.
309    // In Replay scrubbing, we do not call `process_*` recursively,
310    // but instead hold a single `ProcessingState` that represents the current item.
311    // This item owns its parent (plus ancestors) exclusively, which is why we use `BoxCow` here
312    // rather than `Rc` / `Arc`.
313    parent: Option<BoxCow<'a, ProcessingState<'a>>>,
314    path_item: Option<PathItem<'a>>,
315    attrs: Option<Cow<'a, FieldAttrs>>,
316    value_type: EnumSet<ValueType>,
317    depth: usize,
318}
319
320static ROOT_STATE: ProcessingState = ProcessingState {
321    parent: None,
322    path_item: None,
323    attrs: None,
324    value_type: enumset::enum_set!(),
325    depth: 0,
326};
327
328impl<'a> ProcessingState<'a> {
329    /// Returns the root processing state.
330    pub fn root() -> &'static ProcessingState<'static> {
331        &ROOT_STATE
332    }
333
334    /// Creates a new root state.
335    pub fn new_root(
336        attrs: Option<Cow<'static, FieldAttrs>>,
337        value_type: impl IntoIterator<Item = ValueType>,
338    ) -> ProcessingState<'static> {
339        ProcessingState {
340            parent: None,
341            path_item: None,
342            attrs,
343            value_type: value_type.into_iter().collect(),
344            depth: 0,
345        }
346    }
347
348    /// Derives a processing state by entering a static key.
349    pub fn enter_static(
350        &'a self,
351        key: &'static str,
352        attrs: Option<Cow<'static, FieldAttrs>>,
353        value_type: impl IntoIterator<Item = ValueType>,
354    ) -> Self {
355        ProcessingState {
356            parent: Some(BoxCow::Borrowed(self)),
357            path_item: Some(PathItem::StaticKey(key)),
358            attrs,
359            value_type: value_type.into_iter().collect(),
360            depth: self.depth + 1,
361        }
362    }
363
364    /// Derives a processing state by entering a borrowed key.
365    pub fn enter_borrowed(
366        &'a self,
367        key: &'a str,
368        attrs: Option<Cow<'a, FieldAttrs>>,
369        value_type: impl IntoIterator<Item = ValueType>,
370    ) -> Self {
371        ProcessingState {
372            parent: Some(BoxCow::Borrowed(self)),
373            path_item: Some(PathItem::StaticKey(key)),
374            attrs,
375            value_type: value_type.into_iter().collect(),
376            depth: self.depth + 1,
377        }
378    }
379
380    /// Derives a processing state by entering an owned key.
381    ///
382    /// The new (child) state takes ownership of the current (parent) state.
383    pub fn enter_owned(
384        self,
385        key: String,
386        attrs: Option<Cow<'a, FieldAttrs>>,
387        value_type: impl IntoIterator<Item = ValueType>,
388    ) -> Self {
389        let depth = self.depth + 1;
390        ProcessingState {
391            parent: Some(BoxCow::Owned(self.into())),
392            path_item: Some(PathItem::OwnedKey(key)),
393            attrs,
394            value_type: value_type.into_iter().collect(),
395            depth,
396        }
397    }
398
399    /// Derives a processing state by entering an index.
400    pub fn enter_index(
401        &'a self,
402        idx: usize,
403        attrs: Option<Cow<'a, FieldAttrs>>,
404        value_type: impl IntoIterator<Item = ValueType>,
405    ) -> Self {
406        ProcessingState {
407            parent: Some(BoxCow::Borrowed(self)),
408            path_item: Some(PathItem::Index(idx)),
409            attrs,
410            value_type: value_type.into_iter().collect(),
411            depth: self.depth + 1,
412        }
413    }
414
415    /// Derives a processing state without adding a path segment. Useful in newtype structs.
416    pub fn enter_nothing(&'a self, attrs: Option<Cow<'a, FieldAttrs>>) -> Self {
417        ProcessingState {
418            attrs,
419            path_item: None,
420            parent: Some(BoxCow::Borrowed(self)),
421            ..self.clone()
422        }
423    }
424
425    /// Returns the path in the processing state.
426    pub fn path(&'a self) -> Path<'a> {
427        Path(self)
428    }
429
430    pub fn value_type(&self) -> EnumSet<ValueType> {
431        self.value_type
432    }
433
434    /// Returns the field attributes.
435    pub fn attrs(&self) -> &FieldAttrs {
436        match self.attrs {
437            Some(ref cow) => cow,
438            None => &DEFAULT_FIELD_ATTRS,
439        }
440    }
441
442    /// Derives the attrs for recursion.
443    pub fn inner_attrs(&self) -> Option<Cow<'_, FieldAttrs>> {
444        match self.attrs().pii {
445            Pii::True => Some(Cow::Borrowed(&PII_TRUE_FIELD_ATTRS)),
446            Pii::False => None,
447            Pii::Maybe => Some(Cow::Borrowed(&PII_MAYBE_FIELD_ATTRS)),
448        }
449    }
450
451    /// Iterates through this state and all its ancestors up the hierarchy.
452    ///
453    /// This starts at the top of the stack of processing states and ends at the root.  Thus
454    /// the first item returned is the currently visited leaf of the event structure.
455    pub fn iter(&'a self) -> ProcessingStateIter<'a> {
456        ProcessingStateIter {
457            state: Some(self),
458            size: self.depth,
459        }
460    }
461
462    /// Returns the contained parent state.
463    ///
464    /// - Returns `Ok(None)` if the current state is the root.
465    /// - Returns `Err(self)` if the current state does not own the parent state.
466    pub fn try_into_parent(self) -> Result<Option<Self>, Self> {
467        match self.parent {
468            Some(BoxCow::Borrowed(_)) => Err(self),
469            Some(BoxCow::Owned(parent)) => Ok(Some(*parent)),
470            None => Ok(None),
471        }
472    }
473
474    /// Return the depth (~ indentation level) of the currently processed value.
475    pub fn depth(&'a self) -> usize {
476        self.depth
477    }
478
479    /// Return whether the depth changed between parent and self.
480    ///
481    /// This is `false` when we entered a newtype struct.
482    pub fn entered_anything(&'a self) -> bool {
483        if let Some(parent) = &self.parent {
484            parent.depth() != self.depth()
485        } else {
486            true
487        }
488    }
489
490    /// Returns the last path item if there is one. Skips over "dummy" path segments that exist
491    /// because of newtypes.
492    #[inline]
493    fn path_item(&self) -> Option<&PathItem<'_>> {
494        for state in self.iter() {
495            if let Some(ref path_item) = state.path_item {
496                return Some(path_item);
497            }
498        }
499        None
500    }
501}
502
503pub struct ProcessingStateIter<'a> {
504    state: Option<&'a ProcessingState<'a>>,
505    size: usize,
506}
507
508impl<'a> Iterator for ProcessingStateIter<'a> {
509    type Item = &'a ProcessingState<'a>;
510
511    fn next(&mut self) -> Option<Self::Item> {
512        let current = self.state?;
513        self.state = current.parent.as_deref();
514        Some(current)
515    }
516
517    fn size_hint(&self) -> (usize, Option<usize>) {
518        (self.size, Some(self.size))
519    }
520}
521
522impl ExactSizeIterator for ProcessingStateIter<'_> {}
523
524impl Default for ProcessingState<'_> {
525    fn default() -> Self {
526        ProcessingState::root().clone()
527    }
528}
529
530/// Represents the [`ProcessingState`] as a path.
531///
532/// This is a view of a [`ProcessingState`] which treats the stack of states as a path.
533#[derive(Debug)]
534pub struct Path<'a>(&'a ProcessingState<'a>);
535
536impl Path<'_> {
537    /// Returns the current key if there is one
538    #[inline]
539    pub fn key(&self) -> Option<&str> {
540        PathItem::key(self.0.path_item()?)
541    }
542
543    /// Returns the current index if there is one
544    #[inline]
545    pub fn index(&self) -> Option<usize> {
546        PathItem::index(self.0.path_item()?)
547    }
548
549    /// Return the depth (~ indentation level) of the currently processed value.
550    pub fn depth(&self) -> usize {
551        self.0.depth()
552    }
553
554    /// Returns the field attributes of the current path item.
555    pub fn attrs(&self) -> &FieldAttrs {
556        self.0.attrs()
557    }
558
559    /// Iterates through the states in this path.
560    pub fn iter(&self) -> ProcessingStateIter<'_> {
561        self.0.iter()
562    }
563}
564
565impl fmt::Display for Path<'_> {
566    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
567        let mut items = Vec::with_capacity(self.0.depth);
568        for state in self.0.iter() {
569            if let Some(ref path_item) = state.path_item {
570                items.push(path_item)
571            }
572        }
573
574        for (idx, item) in items.into_iter().rev().enumerate() {
575            if idx > 0 {
576                write!(f, ".")?;
577            }
578            write!(f, "{item}")?;
579        }
580        Ok(())
581    }
582}