relay_pii/
selector.rs

1use std::fmt;
2use std::str::FromStr;
3
4use pest::error::Error;
5use pest::iterators::Pair;
6use pest::Parser;
7use relay_event_schema::processor::Path;
8use smallvec::SmallVec;
9
10use relay_event_schema::processor::{Pii, ProcessingState, ValueType};
11
12/// Error for invalid PII selectors.
13#[derive(Debug, thiserror::Error)]
14pub enum InvalidSelectorError {
15    /// Deep wildcard used more than once.
16    #[error("deep wildcard used more than once")]
17    InvalidDeepWildcard,
18
19    /// Wildcard must be part of a path.
20    #[error("wildcard must be part of a path")]
21    InvalidWildcard,
22
23    /// Invalid selector syntax.
24    #[error("{0}")]
25    ParseError(Box<Error<Rule>>),
26
27    /// Invalid index.
28    #[error("invalid index")]
29    InvalidIndex,
30
31    /// Unknown value.
32    #[error("unknown value")]
33    UnknownType,
34
35    /// Internal parser bug: An unexpected item was consumed.
36    #[error("parser bug: consumed {0} (expected {1})")]
37    UnexpectedToken(String, &'static str),
38
39    /// Internal parsing error, this should never happen and is a bug that needs to be fixed.
40    #[error("internal parser error")]
41    InternalError,
42}
43
44#[allow(unknown_lints)]
45#[allow(clippy::upper_case_acronyms)]
46#[allow(clippy::empty_docs)]
47mod parser {
48    use pest_derive::Parser;
49
50    #[derive(Parser)]
51    #[grammar = "selector.pest"]
52    pub struct SelectorParser;
53}
54
55use self::parser::{Rule, SelectorParser};
56
57/// A path component in a composit [`SelectorSpec`].
58#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
59pub enum SelectorPathItem {
60    /// The component refers to a value type.
61    Type(ValueType),
62    /// The component refers to an array index.
63    Index(usize),
64    /// The component refers to a key in an object.
65    Key(String),
66    /// The component is a shallow wildcard (`*`).
67    Wildcard,
68    /// The component is a deep wildcard (`**`).
69    DeepWildcard,
70}
71
72impl fmt::Display for SelectorPathItem {
73    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74        match *self {
75            SelectorPathItem::Type(ty) => write!(f, "${ty}"),
76            SelectorPathItem::Index(index) => write!(f, "{index}"),
77            SelectorPathItem::Key(ref key) => {
78                if key_needs_quoting(key) {
79                    write!(f, "'{}'", key.replace('\'', "''"))
80                } else {
81                    write!(f, "{key}")
82                }
83            }
84            SelectorPathItem::Wildcard => write!(f, "*"),
85            SelectorPathItem::DeepWildcard => write!(f, "**"),
86        }
87    }
88}
89
90impl SelectorPathItem {
91    /// Determine whether a path item matches the respective processing state.
92    ///
93    /// `pii` is not the same as `state.attrs().pii`, but rather the PII flag of the state we're
94    /// actually trying to match against. `i` is the position of the path item within the path.
95    pub(super) fn matches_state(&self, pii: Pii, i: usize, state: &ProcessingState<'_>) -> bool {
96        match (self, pii) {
97            (_, Pii::False) => false,
98
99            // necessary because of array indices
100            (SelectorPathItem::Wildcard, _) => true,
101
102            // a deep wildcard is too sweeping to be specific
103            (SelectorPathItem::DeepWildcard, Pii::True) => true,
104            (SelectorPathItem::DeepWildcard, Pii::Maybe) => false,
105
106            (SelectorPathItem::Type(ty), Pii::True) => state.value_type().contains(*ty),
107            (SelectorPathItem::Type(ty), Pii::Maybe) => {
108                state.value_type().contains(*ty)
109                    && match ty {
110                        // Basic value types cannot be part of a specific path
111                        ValueType::String
112                        | ValueType::Binary
113                        | ValueType::Number
114                        | ValueType::Boolean
115                        | ValueType::DateTime
116                        | ValueType::Array
117                        | ValueType::Object => false,
118
119                        // Other schema-specific value types can be if they are on the first
120                        // position. This list is explicitly typed out such that the decision
121                        // to add new value types to this list has to be made consciously.
122                        //
123                        // It's easy to change a `false` to `true` later, but a breaking change
124                        // to go the other direction. If you're not sure, return `false` for
125                        // your new value type.
126                        ValueType::Event
127                        | ValueType::Attachments
128                        | ValueType::Replay
129                        | ValueType::Exception
130                        | ValueType::Stacktrace
131                        | ValueType::Frame
132                        | ValueType::Request
133                        | ValueType::User
134                        | ValueType::LogEntry
135                        | ValueType::Message
136                        | ValueType::Thread
137                        | ValueType::Breadcrumb
138                        | ValueType::OurLog
139                        | ValueType::Span
140                        | ValueType::Minidump
141                        | ValueType::HeapMemory
142                        | ValueType::StackMemory
143                        | ValueType::ClientSdkInfo => i == 0,
144                    }
145            }
146            (SelectorPathItem::Index(idx), _) => state.path().index() == Some(*idx),
147            (SelectorPathItem::Key(ref key), _) => state
148                .path()
149                .key()
150                .map(|k| k.eq_ignore_ascii_case(key))
151                .unwrap_or(false),
152        }
153    }
154}
155
156/// A selector that can match paths of processing states.
157///
158/// To use a selector, you most likely want to check whether it matches the path of a
159/// [`ProcessingState`].  For this you turn the state into a [`Path`] using
160/// [`ProcessingState::path`] and call [`SelectorSpec::matches_path`], which will iterate through
161/// the path items in the processing state and check whether the selector matches.
162#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
163pub enum SelectorSpec {
164    /// A selector that matches both of two sub-selectors.
165    And(Vec<SelectorSpec>),
166    /// A selector that matches either of two sub-selectors.
167    Or(Vec<SelectorSpec>),
168    /// A selector that matches all paths that do not match the sub-selector.
169    Not(Box<SelectorSpec>),
170    /// A direct path to an item.
171    Path(Vec<SelectorPathItem>),
172}
173
174impl SelectorSpec {
175    /// Parses a selector from a string without legacy special handling.
176    pub fn parse_non_legacy(s: &str) -> Result<SelectorSpec, InvalidSelectorError> {
177        let mut selector = SelectorParser::parse(Rule::RootSelector, s)
178            .map_err(|e| InvalidSelectorError::ParseError(Box::new(e)))?;
179
180        // Extracts the first `OrSelector` spanning the entire selector from the `RootSelector`.
181        // The `RootSelector` is guaranteed to have exactly one `OrSelector`.
182        let Some(selector) = selector.next().and_then(|s| s.into_inner().next()) else {
183            // Internal parsing error, this should never happen.
184            // If this happens the pest file was modified without changing the code.
185            relay_log::error!(
186                selector = s,
187                "internal error parsing selector {s:?}, this is a bug!"
188            );
189            return Err(InvalidSelectorError::InternalError);
190        };
191
192        handle_selector(selector)
193    }
194
195    /// Checks if a path matches given selector.
196    ///
197    /// This walks both the selector and the path starting at the end and towards the root
198    /// to determine if the selector matches the current path.
199    pub fn matches_path(&self, path: &Path) -> bool {
200        let pii = path.attrs().pii;
201        if pii == Pii::False {
202            return false;
203        }
204
205        match *self {
206            SelectorSpec::Path(ref path_items) => {
207                // fastest path: the selector is deeper than the current structure.
208                if path_items.len() > path.depth() {
209                    return false;
210                }
211
212                // fast path: we do not have any deep matches
213                let mut state_iter = path.iter().filter(|state| state.entered_anything());
214                let mut selector_iter = path_items.iter().enumerate().rev();
215                let mut depth_match = false;
216                for state in &mut state_iter {
217                    match selector_iter.next() {
218                        Some((i, path_item)) => {
219                            if !path_item.matches_state(pii, i, state) {
220                                return false;
221                            }
222
223                            if matches!(path_item, SelectorPathItem::DeepWildcard) {
224                                depth_match = true;
225                                break;
226                            }
227                        }
228                        None => break,
229                    }
230                }
231
232                if !depth_match {
233                    return true;
234                }
235
236                // slow path: we collect the remaining states and skip up to the first
237                // match of the selector.
238                let remaining_states = state_iter.collect::<SmallVec<[&ProcessingState<'_>; 16]>>();
239                let mut selector_iter = selector_iter.rev().peekable();
240                let (first_selector_i, first_selector_path) = match selector_iter.next() {
241                    Some(selector_path) => selector_path,
242                    None => return !remaining_states.is_empty(),
243                };
244                let mut path_match_iterator = remaining_states.iter().rev().skip_while(|state| {
245                    !first_selector_path.matches_state(pii, first_selector_i, state)
246                });
247                if path_match_iterator.next().is_none() {
248                    return false;
249                }
250
251                // then we check all remaining items and that nothing is left of the selector
252                path_match_iterator
253                    .zip(&mut selector_iter)
254                    .all(|(state, (i, selector_path))| selector_path.matches_state(pii, i, state))
255                    && selector_iter.next().is_none()
256            }
257            SelectorSpec::And(ref xs) => xs.iter().all(|x| x.matches_path(path)),
258            SelectorSpec::Or(ref xs) => xs.iter().any(|x| x.matches_path(path)),
259            SelectorSpec::Not(ref x) => !x.matches_path(path),
260        }
261    }
262}
263
264impl fmt::Display for SelectorSpec {
265    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
266        match *self {
267            SelectorSpec::And(ref xs) => {
268                for (idx, x) in xs.iter().enumerate() {
269                    if idx > 0 {
270                        write!(f, " && ")?;
271                    }
272
273                    let needs_parens = match *x {
274                        SelectorSpec::And(_) => false,
275                        SelectorSpec::Or(_) => true,
276                        SelectorSpec::Not(_) => false,
277                        SelectorSpec::Path(_) => false,
278                    };
279
280                    if needs_parens {
281                        write!(f, "({x})")?;
282                    } else {
283                        write!(f, "{x}")?;
284                    }
285                }
286            }
287            SelectorSpec::Or(ref xs) => {
288                for (idx, x) in xs.iter().enumerate() {
289                    if idx > 0 {
290                        write!(f, " || ")?;
291                    }
292
293                    // OR has weakest precedence, so everything else binds stronger and does not
294                    // need parens
295
296                    write!(f, "{x}")?;
297                }
298            }
299            SelectorSpec::Not(ref x) => {
300                let needs_parens = match **x {
301                    SelectorSpec::And(_) => true,
302                    SelectorSpec::Or(_) => true,
303                    SelectorSpec::Not(_) => true,
304                    SelectorSpec::Path(_) => false,
305                };
306
307                if needs_parens {
308                    write!(f, "!({x})")?;
309                } else {
310                    write!(f, "!{x}")?;
311                }
312            }
313            SelectorSpec::Path(ref path) => {
314                for (idx, item) in path.iter().enumerate() {
315                    if idx > 0 {
316                        write!(f, ".")?;
317                    }
318                    write!(f, "{item}")?;
319                }
320            }
321        }
322        Ok(())
323    }
324}
325
326impl FromStr for SelectorSpec {
327    type Err = InvalidSelectorError;
328
329    fn from_str(s: &str) -> Result<Self, Self::Err> {
330        // these are temporary legacy selectors
331        match s {
332            "freeform" | "email" | "sensitive" | "text" => {
333                return Ok(SelectorSpec::Path(vec![SelectorPathItem::Type(
334                    ValueType::String,
335                )]));
336            }
337            "databag" | "container" => {
338                return Ok(SelectorSpec::Path(vec![SelectorPathItem::Type(
339                    ValueType::Object,
340                )]));
341            }
342            _ => {}
343        }
344
345        Self::parse_non_legacy(s)
346    }
347}
348
349relay_common::impl_str_serde!(SelectorSpec, "a selector");
350
351impl From<ValueType> for SelectorSpec {
352    fn from(value_type: ValueType) -> Self {
353        SelectorSpec::Path(vec![SelectorPathItem::Type(value_type)])
354    }
355}
356
357fn handle_selector(pair: Pair<Rule>) -> Result<SelectorSpec, InvalidSelectorError> {
358    fn map_multiple_or_inner<F>(
359        pair: Pair<Rule>,
360        f: F,
361    ) -> Result<SelectorSpec, InvalidSelectorError>
362    where
363        F: Fn(Vec<SelectorSpec>) -> SelectorSpec,
364    {
365        let mut iter = pair.into_inner().map(handle_selector).peekable();
366        let first = iter.next().unwrap()?;
367        if iter.peek().is_none() {
368            Ok(first)
369        } else {
370            let mut items = vec![first];
371            for item in iter {
372                items.push(item?);
373            }
374            Ok(f(items))
375        }
376    }
377
378    match pair.as_rule() {
379        Rule::ParenthesisOrPath | Rule::MaybeNotSelector => {
380            handle_selector(pair.into_inner().next().unwrap())
381        }
382        Rule::SelectorPath => {
383            let mut used_deep_wildcard = false;
384            let items: Vec<SelectorPathItem> = pair
385                .into_inner()
386                .map(|item| {
387                    let rv = handle_selector_path_item(item)?;
388                    if rv == SelectorPathItem::DeepWildcard {
389                        if used_deep_wildcard {
390                            return Err(InvalidSelectorError::InvalidDeepWildcard);
391                        } else {
392                            used_deep_wildcard = true;
393                        }
394                    }
395                    Ok(rv)
396                })
397                .collect::<Result<_, _>>()?;
398
399            if matches!(items.as_slice(), [SelectorPathItem::Wildcard]) {
400                return Err(InvalidSelectorError::InvalidWildcard);
401            }
402
403            Ok(SelectorSpec::Path(items))
404        }
405        Rule::AndSelector => map_multiple_or_inner(pair, SelectorSpec::And),
406        Rule::OrSelector => map_multiple_or_inner(pair, SelectorSpec::Or),
407        Rule::NotSelector => Ok(SelectorSpec::Not(Box::new(handle_selector(
408            pair.into_inner().next().unwrap(),
409        )?))),
410        rule => Err(InvalidSelectorError::UnexpectedToken(
411            format!("{rule:?}"),
412            "a selector",
413        )),
414    }
415}
416
417fn handle_selector_path_item(pair: Pair<Rule>) -> Result<SelectorPathItem, InvalidSelectorError> {
418    let pair = pair.into_inner().next().unwrap();
419    match pair.as_rule() {
420        Rule::ObjectType => Ok(SelectorPathItem::Type(
421            pair.as_str()[1..]
422                .parse()
423                .map_err(|_| InvalidSelectorError::UnknownType)?,
424        )),
425        Rule::Wildcard => Ok(SelectorPathItem::Wildcard),
426        Rule::DeepWildcard => Ok(SelectorPathItem::DeepWildcard),
427        Rule::Index => Ok(SelectorPathItem::Index(
428            pair.as_str()
429                .parse()
430                .map_err(|_| InvalidSelectorError::InvalidIndex)?,
431        )),
432        Rule::Key => Ok(SelectorPathItem::Key(handle_key(pair)?)),
433        rule => Err(InvalidSelectorError::UnexpectedToken(
434            format!("{rule:?}"),
435            "a selector path item",
436        )),
437    }
438}
439
440fn handle_key(pair: Pair<Rule>) -> Result<String, InvalidSelectorError> {
441    let pair = pair.into_inner().next().unwrap();
442    match pair.as_rule() {
443        Rule::UnquotedKey => Ok(pair.as_str().to_owned()),
444        Rule::QuotedKey => Ok({
445            let mut key = String::new();
446            for token in pair.into_inner() {
447                key.push_str(token.as_str());
448            }
449            key
450        }),
451        rule => Err(InvalidSelectorError::UnexpectedToken(
452            format!("{rule:?}"),
453            "a key",
454        )),
455    }
456}
457
458fn key_needs_quoting(key: &str) -> bool {
459    SelectorParser::parse(Rule::RootUnquotedKey, key).is_err()
460}
461
462#[cfg(test)]
463mod tests {
464    use std::borrow::Cow;
465
466    use relay_event_schema::processor::FieldAttrs;
467
468    use super::*;
469
470    #[test]
471    fn test_roundtrip() {
472        fn check_roundtrip(s: &str) {
473            assert_eq!(SelectorSpec::from_str(s).unwrap().to_string(), s);
474        }
475
476        check_roundtrip("!(!a)");
477        check_roundtrip("!a || !b");
478        check_roundtrip("!a && !b");
479        check_roundtrip("!(a && !b)");
480        check_roundtrip("!(a && b)");
481    }
482
483    #[test]
484    fn test_invalid() {
485        assert!(matches!(
486            SelectorSpec::from_str("* && foo"),
487            Err(InvalidSelectorError::InvalidWildcard)
488        ));
489        assert!(matches!(
490            SelectorSpec::from_str("$frame.**.foo.**"),
491            Err(InvalidSelectorError::InvalidDeepWildcard)
492        ));
493    }
494
495    macro_rules! assert_matches_raw {
496        ($state:expr, $selector:expr, $expected:expr) => {{
497            let selector: SelectorSpec = $selector.parse().unwrap();
498            let actual = selector.matches_path(&$state.path());
499            assert!(
500                actual == $expected,
501                "Matched {} against {}, expected {:?}, actually {:?}",
502                $selector,
503                $state.path(),
504                $expected,
505                actual
506            );
507        }};
508    }
509
510    macro_rules! assert_matches_pii_maybe {
511        ($state:expr, $first:expr, $($selector:expr,)*) => {{
512            assert_matches_pii_true!($state, $first, $($selector,)*);
513            let state = &$state;
514            let state = state.enter_nothing(Some(Cow::Owned(FieldAttrs::new().pii(Pii::Maybe))));
515
516            assert_matches_raw!(state, $first, true);
517            $(
518                assert_matches_raw!(state, $selector, true);
519            )*
520
521            let joined = concat!($first, $(" && ", $selector,)*);
522            assert_matches_raw!(state, &joined, true);
523
524            let joined = concat!($first, $(" || ", $selector,)*);
525            assert_matches_raw!(state, &joined, true);
526
527            let joined = concat!("** || ", $first, $(" || ", $selector,)*);
528            assert_matches_raw!(state, &joined, true);
529        }}
530    }
531
532    macro_rules! assert_matches_pii_true {
533        ($state:expr, $first:expr, $($selector:expr,)*) => {{
534            let state = &$state;
535            let state = state.enter_nothing(Some(Cow::Owned(FieldAttrs::new().pii(Pii::True))));
536
537            assert_matches_raw!(state, $first, true);
538            $(
539                assert_matches_raw!(state, $selector, true);
540            )*
541
542            let joined = concat!($first, $(" && ", $selector,)*);
543            assert_matches_raw!(state, &joined, true);
544
545            let joined = concat!($first, $(" || ", $selector,)*);
546            assert_matches_raw!(state, &joined, true);
547
548            let joined = concat!("** || ", $first, $(" || ", $selector,)*);
549            assert_matches_raw!(state, &joined, true);
550        }}
551    }
552
553    macro_rules! assert_not_matches {
554        ($state:expr, $($selector:expr,)*) => {{
555            let state = &$state;
556            $(
557                assert_matches_raw!(state, $selector, false);
558            )*
559        }}
560    }
561
562    #[test]
563    fn test_matching() {
564        let event_state = ProcessingState::new_root(None, Some(ValueType::Event)); // .
565        let user_state = event_state.enter_static("user", None, Some(ValueType::User)); // .user
566        let extra_state = user_state.enter_static("extra", None, Some(ValueType::Object)); // .user.extra
567        let foo_state = extra_state.enter_static("foo", None, Some(ValueType::Array)); // .user.extra.foo
568        let zero_state = foo_state.enter_index(0, None, None); // .user.extra.foo.0
569
570        assert_matches_pii_maybe!(
571            extra_state,
572            "user.extra",  // this is an exact match to the state
573            "$user.extra", // this is a match below a type
574            "(** || user.*) && !(foo.bar.baz || a.b.c)",
575        );
576
577        assert_matches_pii_true!(
578            extra_state,
579            // known limitation: double-negations *could* be specific (I'd expect this as a user), but
580            // right now we don't support it
581            "!(!user.extra)",
582            "!(!$user.extra)",
583        );
584
585        assert_matches_pii_maybe!(
586            foo_state,
587            "$user.extra.*", // this is a wildcard match into a type
588        );
589
590        assert_matches_pii_maybe!(
591            zero_state,
592            "$user.extra.foo.*", // a wildcard match into an array
593            "$user.extra.foo.0", // a direct match into an array
594        );
595
596        assert_matches_pii_true!(
597            zero_state,
598            // deep matches are wild
599            "$user.extra.foo.**",
600            "$user.extra.**",
601            "$user.**",
602            "$event.**",
603            "$user.**.0",
604            // types are anywhere
605            "$user.$object.**.0",
606            "(**.0 | absolutebogus)",
607            "(~$object)",
608            "($object.** & (~absolutebogus))",
609            "($object.** & (~absolutebogus))",
610        );
611
612        assert_not_matches!(
613            zero_state,
614            "$user.extra.foo.1", // direct mismatch in an array
615            // deep matches are wild
616            "$user.extra.bar.**",
617            "$user.**.1",
618            "($object | absolutebogus)",
619            "($object & absolutebogus)",
620            "(~$object.**)",
621            "($object | (**.0 & absolutebogus))",
622        );
623
624        assert_matches_pii_true!(
625            foo_state,
626            "($array & $object.*)",
627            "(** & $object.*)",
628            "**.$array",
629        );
630
631        assert_not_matches!(foo_state, "($object & $object.*)",);
632    }
633
634    #[test]
635    fn test_attachments_matching() {
636        let event_state = ProcessingState::new_root(None, None);
637        let attachments_state = event_state.enter_static("", None, Some(ValueType::Attachments)); // .
638        let txt_state = attachments_state.enter_static("file.txt", None, Some(ValueType::Binary)); // .'file.txt'
639        let minidump_state =
640            attachments_state.enter_static("file.dmp", None, Some(ValueType::Minidump)); // .'file.txt'
641        let minidump_state_inner = minidump_state.enter_static("", None, Some(ValueType::Binary)); // .'file.txt'
642
643        assert_matches_pii_maybe!(attachments_state, "$attachments",);
644        assert_matches_pii_maybe!(txt_state, "$attachments.'file.txt'",);
645
646        assert_matches_pii_true!(txt_state, "$binary",);
647        // WAT.  All entire attachments are binary, so why not be able to select them (specific)
648        // like this?  Especially since we can select them with wildcard.
649        assert_matches_pii_true!(txt_state, "$attachments.$binary",);
650
651        // WAT.  This is not problematic but rather... weird?
652        assert_matches_pii_maybe!(txt_state, "$attachments.*",);
653        assert_matches_pii_true!(txt_state, "$attachments.**",);
654
655        assert_matches_pii_maybe!(minidump_state, "$minidump",);
656        // WAT.  This should not behave differently from plain $minidump
657        assert_matches_pii_true!(minidump_state, "$attachments.$minidump",);
658
659        // WAT.  We have the full path to a field here.
660        assert_matches_pii_true!(minidump_state_inner, "$attachments.$minidump.$binary",);
661    }
662}