relay_pii/
selector.rs

1use std::fmt;
2use std::str::FromStr;
3
4use pest::Parser;
5use pest::error::Error;
6use pest::iterators::Pair;
7use relay_event_schema::processor::Path;
8use smallvec::SmallVec;
9
10use relay_event_schema::processor::{Pii, ProcessingState, ValueType};
11
12/// Error for invalid PII selectors.
13#[derive(Debug, thiserror::Error)]
14pub enum InvalidSelectorError {
15    /// Deep wildcard used more than once.
16    #[error("deep wildcard used more than once")]
17    InvalidDeepWildcard,
18
19    /// Wildcard must be part of a path.
20    #[error("wildcard must be part of a path")]
21    InvalidWildcard,
22
23    /// Invalid selector syntax.
24    #[error("{0}")]
25    ParseError(Box<Error<Rule>>),
26
27    /// Invalid index.
28    #[error("invalid index")]
29    InvalidIndex,
30
31    /// Unknown value.
32    #[error("unknown value")]
33    UnknownType,
34
35    /// Internal parser bug: An unexpected item was consumed.
36    #[error("parser bug: consumed {0} (expected {1})")]
37    UnexpectedToken(String, &'static str),
38
39    /// Internal parsing error, this should never happen and is a bug that needs to be fixed.
40    #[error("internal parser error")]
41    InternalError,
42}
43
44#[allow(unknown_lints)]
45#[allow(clippy::upper_case_acronyms)]
46#[allow(clippy::empty_docs)]
47mod parser {
48    use pest_derive::Parser;
49
50    #[derive(Parser)]
51    #[grammar = "selector.pest"]
52    pub struct SelectorParser;
53}
54
55use self::parser::{Rule, SelectorParser};
56
57/// A path component in a composit [`SelectorSpec`].
58#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
59pub enum SelectorPathItem {
60    /// The component refers to a value type.
61    Type(ValueType),
62    /// The component refers to an array index.
63    Index(usize),
64    /// The component refers to a key in an object.
65    Key(String),
66    /// The component is a shallow wildcard (`*`).
67    Wildcard,
68    /// The component is a deep wildcard (`**`).
69    DeepWildcard,
70}
71
72impl fmt::Display for SelectorPathItem {
73    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74        match *self {
75            SelectorPathItem::Type(ty) => write!(f, "${ty}"),
76            SelectorPathItem::Index(index) => write!(f, "{index}"),
77            SelectorPathItem::Key(ref key) => {
78                if key_needs_quoting(key) {
79                    write!(f, "'{}'", key.replace('\'', "''"))
80                } else {
81                    write!(f, "{key}")
82                }
83            }
84            SelectorPathItem::Wildcard => write!(f, "*"),
85            SelectorPathItem::DeepWildcard => write!(f, "**"),
86        }
87    }
88}
89
90impl SelectorPathItem {
91    /// Determine whether a path item matches the respective processing state.
92    ///
93    /// `pii` is not the same as `state.attrs().pii`, but rather the PII flag of the state we're
94    /// actually trying to match against. `i` is the position of the path item within the path.
95    pub(super) fn matches_state(&self, pii: Pii, i: usize, state: &ProcessingState<'_>) -> bool {
96        match (self, pii) {
97            (_, Pii::False) => false,
98
99            // necessary because of array indices
100            (SelectorPathItem::Wildcard, _) => true,
101
102            // a deep wildcard is too sweeping to be specific
103            (SelectorPathItem::DeepWildcard, Pii::True) => true,
104            (SelectorPathItem::DeepWildcard, Pii::Maybe) => false,
105
106            (SelectorPathItem::Type(ty), Pii::True) => state.value_type().contains(*ty),
107            (SelectorPathItem::Type(ty), Pii::Maybe) => {
108                state.value_type().contains(*ty)
109                    && match ty {
110                        // Basic value types cannot be part of a specific path
111                        ValueType::String
112                        | ValueType::Binary
113                        | ValueType::Number
114                        | ValueType::Boolean
115                        | ValueType::DateTime
116                        | ValueType::Array
117                        | ValueType::Object => false,
118
119                        // Other schema-specific value types can be if they are on the first
120                        // position. This list is explicitly typed out such that the decision
121                        // to add new value types to this list has to be made consciously.
122                        //
123                        // It's easy to change a `false` to `true` later, but a breaking change
124                        // to go the other direction. If you're not sure, return `false` for
125                        // your new value type.
126                        ValueType::Event
127                        | ValueType::Attachments
128                        | ValueType::Replay
129                        | ValueType::Exception
130                        | ValueType::Stacktrace
131                        | ValueType::Frame
132                        | ValueType::Request
133                        | ValueType::User
134                        | ValueType::LogEntry
135                        | ValueType::Message
136                        | ValueType::Thread
137                        | ValueType::Breadcrumb
138                        | ValueType::OurLog
139                        | ValueType::TraceMetric
140                        | ValueType::Span
141                        | ValueType::Minidump
142                        | ValueType::HeapMemory
143                        | ValueType::StackMemory
144                        | ValueType::ClientSdkInfo => i == 0,
145                    }
146            }
147            (SelectorPathItem::Index(idx), _) => state.path().index() == Some(*idx),
148            (SelectorPathItem::Key(key), _) => state
149                .path()
150                .key()
151                .map(|k| k.eq_ignore_ascii_case(key))
152                .unwrap_or(false),
153        }
154    }
155}
156
157/// A selector that can match paths of processing states.
158///
159/// To use a selector, you most likely want to check whether it matches the path of a
160/// [`ProcessingState`].  For this you turn the state into a [`Path`] using
161/// [`ProcessingState::path`] and call [`SelectorSpec::matches_path`], which will iterate through
162/// the path items in the processing state and check whether the selector matches.
163#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
164pub enum SelectorSpec {
165    /// A selector that matches both of two sub-selectors.
166    And(Vec<SelectorSpec>),
167    /// A selector that matches either of two sub-selectors.
168    Or(Vec<SelectorSpec>),
169    /// A selector that matches all paths that do not match the sub-selector.
170    Not(Box<SelectorSpec>),
171    /// A direct path to an item.
172    Path(Vec<SelectorPathItem>),
173}
174
175impl SelectorSpec {
176    /// Parses a selector from a string without legacy special handling.
177    pub fn parse_non_legacy(s: &str) -> Result<SelectorSpec, InvalidSelectorError> {
178        let mut selector = SelectorParser::parse(Rule::RootSelector, s)
179            .map_err(|e| InvalidSelectorError::ParseError(Box::new(e)))?;
180
181        // Extracts the first `OrSelector` spanning the entire selector from the `RootSelector`.
182        // The `RootSelector` is guaranteed to have exactly one `OrSelector`.
183        let Some(selector) = selector.next().and_then(|s| s.into_inner().next()) else {
184            // Internal parsing error, this should never happen.
185            // If this happens the pest file was modified without changing the code.
186            relay_log::error!(
187                selector = s,
188                "internal error parsing selector {s:?}, this is a bug!"
189            );
190            return Err(InvalidSelectorError::InternalError);
191        };
192
193        handle_selector(selector)
194    }
195
196    /// Checks if a path matches given selector.
197    ///
198    /// This walks both the selector and the path starting at the end and towards the root
199    /// to determine if the selector matches the current path.
200    pub fn matches_path(&self, path: &Path) -> bool {
201        let pii = path.pii();
202        if pii == Pii::False {
203            return false;
204        }
205
206        match *self {
207            SelectorSpec::Path(ref path_items) => {
208                // fastest path: the selector is deeper than the current structure.
209                if path_items.len() > path.depth() {
210                    return false;
211                }
212
213                // fast path: we do not have any deep matches
214                let mut state_iter = path.iter().filter(|state| state.entered_anything());
215                let mut selector_iter = path_items.iter().enumerate().rev();
216                let mut depth_match = false;
217                for state in &mut state_iter {
218                    match selector_iter.next() {
219                        Some((i, path_item)) => {
220                            if !path_item.matches_state(pii, i, state) {
221                                return false;
222                            }
223
224                            if matches!(path_item, SelectorPathItem::DeepWildcard) {
225                                depth_match = true;
226                                break;
227                            }
228                        }
229                        None => break,
230                    }
231                }
232
233                if !depth_match {
234                    return true;
235                }
236
237                // slow path: we collect the remaining states and skip up to the first
238                // match of the selector.
239                let remaining_states = state_iter.collect::<SmallVec<[&ProcessingState<'_>; 16]>>();
240                let mut selector_iter = selector_iter.rev().peekable();
241                let (first_selector_i, first_selector_path) = match selector_iter.next() {
242                    Some(selector_path) => selector_path,
243                    None => return !remaining_states.is_empty(),
244                };
245                let mut path_match_iterator = remaining_states.iter().rev().skip_while(|state| {
246                    !first_selector_path.matches_state(pii, first_selector_i, state)
247                });
248                if path_match_iterator.next().is_none() {
249                    return false;
250                }
251
252                // then we check all remaining items and that nothing is left of the selector
253                path_match_iterator
254                    .zip(&mut selector_iter)
255                    .all(|(state, (i, selector_path))| selector_path.matches_state(pii, i, state))
256                    && selector_iter.next().is_none()
257            }
258            SelectorSpec::And(ref xs) => xs.iter().all(|x| x.matches_path(path)),
259            SelectorSpec::Or(ref xs) => xs.iter().any(|x| x.matches_path(path)),
260            SelectorSpec::Not(ref x) => !x.matches_path(path),
261        }
262    }
263}
264
265impl fmt::Display for SelectorSpec {
266    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
267        match *self {
268            SelectorSpec::And(ref xs) => {
269                for (idx, x) in xs.iter().enumerate() {
270                    if idx > 0 {
271                        write!(f, " && ")?;
272                    }
273
274                    let needs_parens = match *x {
275                        SelectorSpec::And(_) => false,
276                        SelectorSpec::Or(_) => true,
277                        SelectorSpec::Not(_) => false,
278                        SelectorSpec::Path(_) => false,
279                    };
280
281                    if needs_parens {
282                        write!(f, "({x})")?;
283                    } else {
284                        write!(f, "{x}")?;
285                    }
286                }
287            }
288            SelectorSpec::Or(ref xs) => {
289                for (idx, x) in xs.iter().enumerate() {
290                    if idx > 0 {
291                        write!(f, " || ")?;
292                    }
293
294                    // OR has weakest precedence, so everything else binds stronger and does not
295                    // need parens
296
297                    write!(f, "{x}")?;
298                }
299            }
300            SelectorSpec::Not(ref x) => {
301                let needs_parens = match **x {
302                    SelectorSpec::And(_) => true,
303                    SelectorSpec::Or(_) => true,
304                    SelectorSpec::Not(_) => true,
305                    SelectorSpec::Path(_) => false,
306                };
307
308                if needs_parens {
309                    write!(f, "!({x})")?;
310                } else {
311                    write!(f, "!{x}")?;
312                }
313            }
314            SelectorSpec::Path(ref path) => {
315                for (idx, item) in path.iter().enumerate() {
316                    if idx > 0 {
317                        write!(f, ".")?;
318                    }
319                    write!(f, "{item}")?;
320                }
321            }
322        }
323        Ok(())
324    }
325}
326
327impl FromStr for SelectorSpec {
328    type Err = InvalidSelectorError;
329
330    fn from_str(s: &str) -> Result<Self, Self::Err> {
331        // these are temporary legacy selectors
332        match s {
333            "freeform" | "email" | "sensitive" | "text" => {
334                return Ok(SelectorSpec::Path(vec![SelectorPathItem::Type(
335                    ValueType::String,
336                )]));
337            }
338            "databag" | "container" => {
339                return Ok(SelectorSpec::Path(vec![SelectorPathItem::Type(
340                    ValueType::Object,
341                )]));
342            }
343            _ => {}
344        }
345
346        Self::parse_non_legacy(s)
347    }
348}
349
350relay_common::impl_str_serde!(SelectorSpec, "a selector");
351
352impl From<ValueType> for SelectorSpec {
353    fn from(value_type: ValueType) -> Self {
354        SelectorSpec::Path(vec![SelectorPathItem::Type(value_type)])
355    }
356}
357
358fn handle_selector(pair: Pair<Rule>) -> Result<SelectorSpec, InvalidSelectorError> {
359    fn map_multiple_or_inner<F>(
360        pair: Pair<Rule>,
361        f: F,
362    ) -> Result<SelectorSpec, InvalidSelectorError>
363    where
364        F: Fn(Vec<SelectorSpec>) -> SelectorSpec,
365    {
366        let mut iter = pair.into_inner().map(handle_selector).peekable();
367        let first = iter.next().unwrap()?;
368        if iter.peek().is_none() {
369            Ok(first)
370        } else {
371            let mut items = vec![first];
372            for item in iter {
373                items.push(item?);
374            }
375            Ok(f(items))
376        }
377    }
378
379    match pair.as_rule() {
380        Rule::ParenthesisOrPath | Rule::MaybeNotSelector => {
381            handle_selector(pair.into_inner().next().unwrap())
382        }
383        Rule::SelectorPath => {
384            let mut used_deep_wildcard = false;
385            let items: Vec<SelectorPathItem> = pair
386                .into_inner()
387                .map(|item| {
388                    let rv = handle_selector_path_item(item)?;
389                    if rv == SelectorPathItem::DeepWildcard {
390                        if used_deep_wildcard {
391                            return Err(InvalidSelectorError::InvalidDeepWildcard);
392                        } else {
393                            used_deep_wildcard = true;
394                        }
395                    }
396                    Ok(rv)
397                })
398                .collect::<Result<_, _>>()?;
399
400            if matches!(items.as_slice(), [SelectorPathItem::Wildcard]) {
401                return Err(InvalidSelectorError::InvalidWildcard);
402            }
403
404            Ok(SelectorSpec::Path(items))
405        }
406        Rule::AndSelector => map_multiple_or_inner(pair, SelectorSpec::And),
407        Rule::OrSelector => map_multiple_or_inner(pair, SelectorSpec::Or),
408        Rule::NotSelector => Ok(SelectorSpec::Not(Box::new(handle_selector(
409            pair.into_inner().next().unwrap(),
410        )?))),
411        rule => Err(InvalidSelectorError::UnexpectedToken(
412            format!("{rule:?}"),
413            "a selector",
414        )),
415    }
416}
417
418fn handle_selector_path_item(pair: Pair<Rule>) -> Result<SelectorPathItem, InvalidSelectorError> {
419    let pair = pair.into_inner().next().unwrap();
420    match pair.as_rule() {
421        Rule::ObjectType => Ok(SelectorPathItem::Type(
422            pair.as_str()[1..]
423                .parse()
424                .map_err(|_| InvalidSelectorError::UnknownType)?,
425        )),
426        Rule::Wildcard => Ok(SelectorPathItem::Wildcard),
427        Rule::DeepWildcard => Ok(SelectorPathItem::DeepWildcard),
428        Rule::Index => Ok(SelectorPathItem::Index(
429            pair.as_str()
430                .parse()
431                .map_err(|_| InvalidSelectorError::InvalidIndex)?,
432        )),
433        Rule::Key => Ok(SelectorPathItem::Key(handle_key(pair)?)),
434        rule => Err(InvalidSelectorError::UnexpectedToken(
435            format!("{rule:?}"),
436            "a selector path item",
437        )),
438    }
439}
440
441fn handle_key(pair: Pair<Rule>) -> Result<String, InvalidSelectorError> {
442    let pair = pair.into_inner().next().unwrap();
443    match pair.as_rule() {
444        Rule::UnquotedKey => Ok(pair.as_str().to_owned()),
445        Rule::QuotedKey => Ok({
446            let mut key = String::new();
447            for token in pair.into_inner() {
448                key.push_str(token.as_str());
449            }
450            key
451        }),
452        rule => Err(InvalidSelectorError::UnexpectedToken(
453            format!("{rule:?}"),
454            "a key",
455        )),
456    }
457}
458
459fn key_needs_quoting(key: &str) -> bool {
460    SelectorParser::parse(Rule::RootUnquotedKey, key).is_err()
461}
462
463#[cfg(test)]
464mod tests {
465    use std::borrow::Cow;
466
467    use relay_event_schema::processor::FieldAttrs;
468
469    use super::*;
470
471    #[test]
472    fn test_roundtrip() {
473        fn check_roundtrip(s: &str) {
474            assert_eq!(SelectorSpec::from_str(s).unwrap().to_string(), s);
475        }
476
477        check_roundtrip("!(!a)");
478        check_roundtrip("!a || !b");
479        check_roundtrip("!a && !b");
480        check_roundtrip("!(a && !b)");
481        check_roundtrip("!(a && b)");
482    }
483
484    #[test]
485    fn test_invalid() {
486        assert!(matches!(
487            SelectorSpec::from_str("* && foo"),
488            Err(InvalidSelectorError::InvalidWildcard)
489        ));
490        assert!(matches!(
491            SelectorSpec::from_str("$frame.**.foo.**"),
492            Err(InvalidSelectorError::InvalidDeepWildcard)
493        ));
494    }
495
496    macro_rules! assert_matches_raw {
497        ($state:expr, $selector:expr, $expected:expr) => {{
498            let selector: SelectorSpec = $selector.parse().unwrap();
499            let actual = selector.matches_path(&$state.path());
500            assert!(
501                actual == $expected,
502                "Matched {} against {}, expected {:?}, actually {:?}",
503                $selector,
504                $state.path(),
505                $expected,
506                actual
507            );
508        }};
509    }
510
511    macro_rules! assert_matches_pii_maybe {
512        ($state:expr, $first:expr, $($selector:expr,)*) => {{
513            assert_matches_pii_true!($state, $first, $($selector,)*);
514            let state = &$state;
515            let state = state.enter_nothing(Some(Cow::Owned(FieldAttrs::new().pii(Pii::Maybe))));
516
517            assert_matches_raw!(state, $first, true);
518            $(
519                assert_matches_raw!(state, $selector, true);
520            )*
521
522            let joined = concat!($first, $(" && ", $selector,)*);
523            assert_matches_raw!(state, &joined, true);
524
525            let joined = concat!($first, $(" || ", $selector,)*);
526            assert_matches_raw!(state, &joined, true);
527
528            let joined = concat!("** || ", $first, $(" || ", $selector,)*);
529            assert_matches_raw!(state, &joined, true);
530        }}
531    }
532
533    macro_rules! assert_matches_pii_true {
534        ($state:expr, $first:expr, $($selector:expr,)*) => {{
535            let state = &$state;
536            let state = state.enter_nothing(Some(Cow::Owned(FieldAttrs::new().pii(Pii::True))));
537
538            assert_matches_raw!(state, $first, true);
539            $(
540                assert_matches_raw!(state, $selector, true);
541            )*
542
543            let joined = concat!($first, $(" && ", $selector,)*);
544            assert_matches_raw!(state, &joined, true);
545
546            let joined = concat!($first, $(" || ", $selector,)*);
547            assert_matches_raw!(state, &joined, true);
548
549            let joined = concat!("** || ", $first, $(" || ", $selector,)*);
550            assert_matches_raw!(state, &joined, true);
551        }}
552    }
553
554    macro_rules! assert_not_matches {
555        ($state:expr, $($selector:expr,)*) => {{
556            let state = &$state;
557            $(
558                assert_matches_raw!(state, $selector, false);
559            )*
560        }}
561    }
562
563    #[test]
564    fn test_matching() {
565        let event_state = ProcessingState::new_root(None, Some(ValueType::Event)); // .
566        let user_state = event_state.enter_borrowed("user", None, Some(ValueType::User)); // .user
567        let extra_state = user_state.enter_borrowed("extra", None, Some(ValueType::Object)); // .user.extra
568        let foo_state = extra_state.enter_borrowed("foo", None, Some(ValueType::Array)); // .user.extra.foo
569        let zero_state = foo_state.enter_index(0, None, None); // .user.extra.foo.0
570
571        assert_matches_pii_maybe!(
572            extra_state,
573            "user.extra",  // this is an exact match to the state
574            "$user.extra", // this is a match below a type
575            "(** || user.*) && !(foo.bar.baz || a.b.c)",
576        );
577
578        assert_matches_pii_true!(
579            extra_state,
580            // known limitation: double-negations *could* be specific (I'd expect this as a user), but
581            // right now we don't support it
582            "!(!user.extra)",
583            "!(!$user.extra)",
584        );
585
586        assert_matches_pii_maybe!(
587            foo_state,
588            "$user.extra.*", // this is a wildcard match into a type
589        );
590
591        assert_matches_pii_maybe!(
592            zero_state,
593            "$user.extra.foo.*", // a wildcard match into an array
594            "$user.extra.foo.0", // a direct match into an array
595        );
596
597        assert_matches_pii_true!(
598            zero_state,
599            // deep matches are wild
600            "$user.extra.foo.**",
601            "$user.extra.**",
602            "$user.**",
603            "$event.**",
604            "$user.**.0",
605            // types are anywhere
606            "$user.$object.**.0",
607            "(**.0 | absolutebogus)",
608            "(~$object)",
609            "($object.** & (~absolutebogus))",
610            "($object.** & (~absolutebogus))",
611        );
612
613        assert_not_matches!(
614            zero_state,
615            "$user.extra.foo.1", // direct mismatch in an array
616            // deep matches are wild
617            "$user.extra.bar.**",
618            "$user.**.1",
619            "($object | absolutebogus)",
620            "($object & absolutebogus)",
621            "(~$object.**)",
622            "($object | (**.0 & absolutebogus))",
623        );
624
625        assert_matches_pii_true!(
626            foo_state,
627            "($array & $object.*)",
628            "(** & $object.*)",
629            "**.$array",
630        );
631
632        assert_not_matches!(foo_state, "($object & $object.*)",);
633    }
634
635    #[test]
636    fn test_attachments_matching() {
637        let event_state = ProcessingState::new_root(None, None);
638        let attachments_state = event_state.enter_borrowed("", None, Some(ValueType::Attachments)); // .
639        let txt_state = attachments_state.enter_borrowed("file.txt", None, Some(ValueType::Binary)); // .'file.txt'
640        let minidump_state =
641            attachments_state.enter_borrowed("file.dmp", None, Some(ValueType::Minidump)); // .'file.txt'
642        let minidump_state_inner = minidump_state.enter_borrowed("", None, Some(ValueType::Binary)); // .'file.txt'
643
644        assert_matches_pii_maybe!(attachments_state, "$attachments",);
645        assert_matches_pii_maybe!(txt_state, "$attachments.'file.txt'",);
646
647        assert_matches_pii_true!(txt_state, "$binary",);
648        // WAT.  All entire attachments are binary, so why not be able to select them (specific)
649        // like this?  Especially since we can select them with wildcard.
650        assert_matches_pii_true!(txt_state, "$attachments.$binary",);
651
652        // WAT.  This is not problematic but rather... weird?
653        assert_matches_pii_maybe!(txt_state, "$attachments.*",);
654        assert_matches_pii_true!(txt_state, "$attachments.**",);
655
656        assert_matches_pii_maybe!(minidump_state, "$minidump",);
657        // WAT.  This should not behave differently from plain $minidump
658        assert_matches_pii_true!(minidump_state, "$attachments.$minidump",);
659
660        // WAT.  We have the full path to a field here.
661        assert_matches_pii_true!(minidump_state_inner, "$attachments.$minidump.$binary",);
662    }
663
664    #[test]
665    fn test_logs_matching() {
666        let event_state = ProcessingState::new_root(None, None);
667        let log_state = event_state.enter_borrowed("", None, Some(ValueType::OurLog)); // .
668        let body_state = log_state.enter_borrowed("body", None, Some(ValueType::String));
669        let attributes_state =
670            log_state.enter_borrowed("attributes", None, Some(ValueType::Object));
671
672        assert_matches_pii_maybe!(log_state, "$log",);
673        assert_matches_pii_true!(body_state, "$log.body",);
674        assert_matches_pii_true!(attributes_state, "$log.attributes",);
675    }
676}