relay_pii/
processor.rs

1use std::borrow::Cow;
2use std::collections::{BTreeMap, BTreeSet};
3use std::mem;
4use std::sync::OnceLock;
5
6use regex::Regex;
7use relay_event_schema::processor::{
8    self, enum_set, process_value, Chunk, Pii, ProcessValue, ProcessingAction, ProcessingResult,
9    ProcessingState, Processor, ValueType,
10};
11use relay_event_schema::protocol::{
12    AsPair, Event, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User,
13};
14use relay_protocol::{Annotated, Array, Meta, Remark, RemarkType, Value};
15
16use crate::compiledconfig::{CompiledPiiConfig, RuleRef};
17use crate::config::RuleType;
18use crate::redactions::Redaction;
19use crate::regexes::{self, PatternType, ReplaceBehavior, ANYTHING_REGEX};
20use crate::utils;
21
22/// A processor that performs PII stripping.
23pub struct PiiProcessor<'a> {
24    compiled_config: &'a CompiledPiiConfig,
25}
26
27impl<'a> PiiProcessor<'a> {
28    /// Creates a new processor based on a config.
29    pub fn new(compiled_config: &'a CompiledPiiConfig) -> PiiProcessor<'a> {
30        // this constructor needs to be cheap... a new PiiProcessor is created for each event. Move
31        // any init logic into CompiledPiiConfig::new.
32        PiiProcessor { compiled_config }
33    }
34
35    fn apply_all_rules(
36        &self,
37        meta: &mut Meta,
38        state: &ProcessingState<'_>,
39        mut value: Option<&mut String>,
40    ) -> ProcessingResult {
41        let pii = state.attrs().pii;
42        if pii == Pii::False {
43            return Ok(());
44        }
45
46        for (selector, rules) in self.compiled_config.applications.iter() {
47            if selector.matches_path(&state.path()) {
48                #[allow(clippy::needless_option_as_deref)]
49                for rule in rules {
50                    let reborrowed_value = value.as_deref_mut();
51                    apply_rule_to_value(meta, rule, state.path().key(), reborrowed_value)?;
52                }
53            }
54        }
55
56        Ok(())
57    }
58}
59
60impl Processor for PiiProcessor<'_> {
61    fn before_process<T: ProcessValue>(
62        &mut self,
63        value: Option<&T>,
64        meta: &mut Meta,
65        state: &ProcessingState<'_>,
66    ) -> ProcessingResult {
67        if let Some(Value::String(original_value)) = meta.original_value_as_mut() {
68            // Also apply pii scrubbing to the original value (set by normalization or other processors),
69            // such that we do not leak sensitive data through meta. Deletes `original_value` if an Error
70            // value is returned.
71            if let Some(parent) = state.iter().next() {
72                let path = state.path();
73                let new_state = parent.enter_borrowed(
74                    path.key().unwrap_or(""),
75                    Some(Cow::Borrowed(state.attrs())),
76                    enum_set!(ValueType::String),
77                );
78
79                if self
80                    .apply_all_rules(&mut Meta::default(), &new_state, Some(original_value))
81                    .is_err()
82                {
83                    // `apply_all_rules` returned `DeleteValueHard` or `DeleteValueSoft`, so delete the original as well.
84                    meta.set_original_value(Option::<String>::None);
85                }
86            }
87        }
88
89        // booleans cannot be PII, and strings are handled in process_string
90        if state.value_type().contains(ValueType::Boolean)
91            || state.value_type().contains(ValueType::String)
92        {
93            return Ok(());
94        }
95
96        if value.is_none() {
97            return Ok(());
98        }
99
100        // apply rules based on key/path
101        self.apply_all_rules(meta, state, None)
102    }
103
104    fn process_array<T>(
105        &mut self,
106        array: &mut Array<T>,
107        _meta: &mut Meta,
108        state: &ProcessingState<'_>,
109    ) -> ProcessingResult
110    where
111        T: ProcessValue,
112    {
113        if is_pairlist(array) {
114            for annotated in array {
115                let mut mapped = mem::take(annotated).map_value(T::into_value);
116
117                if let Some(Value::Array(ref mut pair)) = mapped.value_mut() {
118                    let mut value = mem::take(&mut pair[1]);
119                    let value_type = ValueType::for_field(&value);
120
121                    if let Some(key_name) = &pair[0].as_str() {
122                        // We enter the key of the first element of the array, since we treat it
123                        // as a pair.
124                        let key_state =
125                            state.enter_borrowed(key_name, state.inner_attrs(), value_type);
126                        // We process the value with a state that "simulates" the first value of the
127                        // array as if it was the key of a dictionary.
128                        process_value(&mut value, self, &key_state)?;
129                    }
130
131                    // Put value back into pair.
132                    pair[1] = value;
133                }
134
135                // Put pair back into array.
136                *annotated = T::from_value(mapped);
137            }
138
139            Ok(())
140        } else {
141            // If we didn't find a pairlist, we can process child values as normal.
142            array.process_child_values(self, state)
143        }
144    }
145
146    fn process_string(
147        &mut self,
148        value: &mut String,
149        meta: &mut Meta,
150        state: &ProcessingState<'_>,
151    ) -> ProcessingResult {
152        if let "" | "true" | "false" | "null" | "undefined" = value.as_str() {
153            return Ok(());
154        }
155
156        // same as before_process. duplicated here because we can only check for "true",
157        // "false" etc in process_string.
158        self.apply_all_rules(meta, state, Some(value))
159    }
160
161    fn process_native_image_path(
162        &mut self,
163        NativeImagePath(ref mut value): &mut NativeImagePath,
164        meta: &mut Meta,
165        state: &ProcessingState<'_>,
166    ) -> ProcessingResult {
167        // In NativeImagePath we must not strip the file's basename because that would break
168        // processing.
169        //
170        // We pop the basename from the end of the string, call process_string and push the
171        // basename again.
172        //
173        // The ranges in Meta should still be right as long as we only pop/push from the end of the
174        // string. If we decide that we need to preserve anything other than suffixes all PII
175        // tooltips/annotations are potentially wrong.
176
177        if let Some(index) = value.rfind(['/', '\\']) {
178            let basename = value.split_off(index);
179            match self.process_string(value, meta, state) {
180                Ok(()) => value.push_str(&basename),
181                Err(ProcessingAction::DeleteValueHard) | Err(ProcessingAction::DeleteValueSoft) => {
182                    basename[1..].clone_into(value);
183                }
184                Err(ProcessingAction::InvalidTransaction(x)) => {
185                    return Err(ProcessingAction::InvalidTransaction(x))
186                }
187            }
188        }
189
190        Ok(())
191    }
192
193    fn process_pairlist<T: ProcessValue + AsPair>(
194        &mut self,
195        value: &mut PairList<T>,
196        _meta: &mut Meta,
197        state: &ProcessingState,
198    ) -> ProcessingResult {
199        utils::process_pairlist(self, value, state)
200    }
201
202    fn process_user(
203        &mut self,
204        user: &mut User,
205        _meta: &mut Meta,
206        state: &ProcessingState<'_>,
207    ) -> ProcessingResult {
208        let ip_was_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
209
210        // Recurse into the user and does PII processing on fields.
211        user.process_child_values(self, state)?;
212
213        let has_other_fields = user.id.value().is_some()
214            || user.username.value().is_some()
215            || user.email.value().is_some();
216
217        let ip_is_still_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
218
219        // If the IP address has become invalid as part of PII processing, we move it into the user
220        // ID. That ensures people can do IP hashing and still have a correct users-affected count.
221        //
222        // Right now both Snuba and EventUser discard unparseable IPs for indexing, and we assume
223        // we want to keep it that way.
224        //
225        // If there are any other fields set that take priority over the IP for uniquely
226        // identifying a user (has_other_fields), we do not want to do anything. The value will be
227        // wiped out in renormalization anyway.
228        if ip_was_valid && !has_other_fields && !ip_is_still_valid {
229            user.id = mem::take(&mut user.ip_address).map_value(|ip| ip.into_inner().into());
230            user.ip_address.meta_mut().add_remark(Remark::new(
231                RemarkType::Removed,
232                "pii:ip_address".to_string(),
233            ));
234        }
235
236        Ok(())
237    }
238
239    // Replay PII processor entry point.
240    fn process_replay(
241        &mut self,
242        replay: &mut Replay,
243        _meta: &mut Meta,
244        state: &ProcessingState<'_>,
245    ) -> ProcessingResult {
246        replay.process_child_values(self, state)?;
247        Ok(())
248    }
249}
250
251#[derive(Default)]
252struct PairListProcessor {
253    is_pair: bool,
254    has_string_key: bool,
255}
256
257impl PairListProcessor {
258    /// Returns true if the processor identified the supplied data as an array composed of
259    /// a key (string) and a value.
260    fn is_pair_array(&self) -> bool {
261        self.is_pair && self.has_string_key
262    }
263}
264
265impl Processor for PairListProcessor {
266    fn process_array<T>(
267        &mut self,
268        value: &mut Array<T>,
269        _meta: &mut Meta,
270        state: &ProcessingState<'_>,
271    ) -> ProcessingResult
272    where
273        T: ProcessValue,
274    {
275        self.is_pair = state.depth() == 0 && value.len() == 2;
276        if self.is_pair {
277            let key_type = ValueType::for_field(&value[0]);
278            process_value(
279                &mut value[0],
280                self,
281                &state.enter_index(0, state.inner_attrs(), key_type),
282            )?;
283        }
284
285        Ok(())
286    }
287
288    fn process_string(
289        &mut self,
290        _value: &mut String,
291        _meta: &mut Meta,
292        state: &ProcessingState<'_>,
293    ) -> ProcessingResult where {
294        if state.depth() == 1 && state.path().index() == Some(0) {
295            self.has_string_key = true;
296        }
297
298        Ok(())
299    }
300}
301
302fn is_pairlist<T: ProcessValue>(array: &mut Array<T>) -> bool {
303    for element in array.iter_mut() {
304        let mut visitor = PairListProcessor::default();
305        process_value(element, &mut visitor, ProcessingState::root()).ok();
306        if !visitor.is_pair_array() {
307            return false;
308        }
309    }
310
311    !array.is_empty()
312}
313
314/// Scrubs GraphQL variables from the event.
315pub fn scrub_graphql(event: &mut Event) {
316    let mut keys: BTreeSet<&str> = BTreeSet::new();
317
318    let mut is_graphql = false;
319
320    // Collect the variables keys and scrub them out.
321    if let Some(request) = event.request.value_mut() {
322        if let Some(Value::Object(data)) = request.data.value_mut() {
323            if let Some(api_target) = request.api_target.value() {
324                if api_target.eq_ignore_ascii_case("graphql") {
325                    is_graphql = true;
326                }
327            }
328
329            if is_graphql {
330                if let Some(Annotated(Some(Value::Object(variables)), _)) =
331                    data.get_mut("variables")
332                {
333                    for (key, value) in variables.iter_mut() {
334                        keys.insert(key);
335                        value.set_value(Some(Value::String("[Filtered]".to_string())));
336                    }
337                }
338            }
339        }
340    }
341
342    if !is_graphql {
343        return;
344    }
345
346    // Scrub PII from the data object if they match the variables keys.
347    if let Some(contexts) = event.contexts.value_mut() {
348        if let Some(response) = contexts.get_mut::<ResponseContext>() {
349            if let Some(Value::Object(data)) = response.data.value_mut() {
350                if let Some(Annotated(Some(Value::Object(graphql_data)), _)) = data.get_mut("data")
351                {
352                    if !keys.is_empty() {
353                        scrub_graphql_data(&keys, graphql_data);
354                    } else {
355                        // If we don't have the variable keys, we scrub the whole data object
356                        // because the query or mutation weren't parameterized.
357                        data.remove("data");
358                    }
359                }
360            }
361        }
362    }
363}
364
365/// Scrubs values from the data object to `[Filtered]`.
366fn scrub_graphql_data(keys: &BTreeSet<&str>, data: &mut BTreeMap<String, Annotated<Value>>) {
367    for (key, value) in data.iter_mut() {
368        match value.value_mut() {
369            Some(Value::Object(item_data)) => {
370                scrub_graphql_data(keys, item_data);
371            }
372            _ => {
373                if keys.contains(key.as_str()) {
374                    value.set_value(Some(Value::String("[Filtered]".to_string())));
375                }
376            }
377        }
378    }
379}
380
381fn apply_rule_to_value(
382    meta: &mut Meta,
383    rule: &RuleRef,
384    key: Option<&str>,
385    mut value: Option<&mut String>,
386) -> ProcessingResult {
387    // The rule might specify to remove or to redact. If redaction is chosen, we need to
388    // chunk up the value, otherwise we need to simply mark the value for deletion.
389    let should_redact_chunks = !matches!(rule.redaction, Redaction::Default | Redaction::Remove);
390
391    // In case the value is not a string (but a container, bool or number) and the rule matches on
392    // anything, we can only remove the value (not replace, hash, etc).
393    if rule.ty == RuleType::Anything && (value.is_none() || !should_redact_chunks) {
394        // The value is a container, @anything on a container can do nothing but delete.
395        meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
396        return Err(ProcessingAction::DeleteValueHard);
397    }
398
399    macro_rules! apply_regex {
400        ($regex:expr, $replace_behavior:expr) => {
401            if let Some(ref mut value) = value {
402                processor::process_chunked_value(value, meta, |chunks| {
403                    apply_regex_to_chunks(chunks, rule, $regex, $replace_behavior)
404                });
405            }
406        };
407    }
408
409    for (pattern_type, regex, replace_behavior) in regexes::get_regex_for_rule_type(&rule.ty) {
410        match pattern_type {
411            PatternType::KeyValue => {
412                if regex.is_match(key.unwrap_or("")) {
413                    if value.is_some() && should_redact_chunks {
414                        // If we're given a string value here, redact the value like we would with
415                        // @anything.
416                        apply_regex!(&ANYTHING_REGEX, replace_behavior);
417                    } else {
418                        meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
419                        return Err(ProcessingAction::DeleteValueHard);
420                    }
421                } else {
422                    // If we did not redact using the key, we will redact the entire value if the key
423                    // appears in it.
424                    apply_regex!(regex, replace_behavior);
425                }
426            }
427            PatternType::Value => {
428                apply_regex!(regex, replace_behavior);
429            }
430        }
431    }
432
433    Ok(())
434}
435
436fn apply_regex_to_chunks<'a>(
437    chunks: Vec<Chunk<'a>>,
438    rule: &RuleRef,
439    regex: &Regex,
440    replace_behavior: ReplaceBehavior,
441) -> Vec<Chunk<'a>> {
442    // NB: This function allocates the entire string and all chunks a second time. This means it
443    // cannot reuse chunks and reallocates them. Ideally, we would be able to run the regex directly
444    // on the chunks, but the `regex` crate does not support that.
445
446    let mut search_string = String::new();
447    let mut has_text = false;
448    for chunk in &chunks {
449        match chunk {
450            Chunk::Text { text } => {
451                has_text = true;
452                search_string.push_str(&text.replace('\x00', ""));
453            }
454            Chunk::Redaction { .. } => search_string.push('\x00'),
455        }
456    }
457
458    if !has_text {
459        // Nothing to replace.
460        return chunks;
461    }
462
463    // Early exit if this regex does not match and return the original chunks.
464    let mut captures_iter = regex.captures_iter(&search_string).peekable();
465    if captures_iter.peek().is_none() {
466        return chunks;
467    }
468
469    let mut replacement_chunks = vec![];
470    for chunk in chunks {
471        if let Chunk::Redaction { .. } = chunk {
472            replacement_chunks.push(chunk);
473        }
474    }
475    replacement_chunks.reverse();
476
477    fn process_text<'a>(
478        text: &str,
479        rv: &mut Vec<Chunk<'a>>,
480        replacement_chunks: &mut Vec<Chunk<'a>>,
481    ) {
482        if text.is_empty() {
483            return;
484        }
485
486        static NULL_SPLIT_RE: OnceLock<Regex> = OnceLock::new();
487        let regex = NULL_SPLIT_RE.get_or_init(|| {
488            #[allow(clippy::trivial_regex)]
489            Regex::new("\x00").unwrap()
490        });
491
492        let mut pos = 0;
493        for piece in regex.find_iter(text) {
494            rv.push(Chunk::Text {
495                text: Cow::Owned(text[pos..piece.start()].to_string()),
496            });
497            rv.push(replacement_chunks.pop().unwrap());
498            pos = piece.end();
499        }
500
501        rv.push(Chunk::Text {
502            text: Cow::Owned(text[pos..].to_string()),
503        });
504    }
505
506    let mut pos = 0;
507    let mut rv = Vec::with_capacity(replacement_chunks.len());
508
509    match replace_behavior {
510        ReplaceBehavior::Groups(ref groups) => {
511            for m in captures_iter {
512                for (idx, g) in m.iter().enumerate() {
513                    if let Some(g) = g {
514                        if groups.contains(&(idx as u8)) {
515                            process_text(
516                                &search_string[pos..g.start()],
517                                &mut rv,
518                                &mut replacement_chunks,
519                            );
520                            insert_replacement_chunks(rule, g.as_str(), &mut rv);
521                            pos = g.end();
522                        }
523                    }
524                }
525            }
526            process_text(&search_string[pos..], &mut rv, &mut replacement_chunks);
527            debug_assert!(replacement_chunks.is_empty());
528        }
529        ReplaceBehavior::Value => {
530            // We only want to replace a string value, and the replacement chunk for that is
531            // inserted by insert_replacement_chunks. Adding chunks from replacement_chunks
532            // results in the incorrect behavior of a total of more chunks than the input.
533            insert_replacement_chunks(rule, &search_string, &mut rv);
534        }
535    }
536    rv
537}
538
539fn insert_replacement_chunks(rule: &RuleRef, text: &str, output: &mut Vec<Chunk<'_>>) {
540    match &rule.redaction {
541        Redaction::Default | Redaction::Remove => {
542            output.push(Chunk::Redaction {
543                text: Cow::Borrowed(""),
544                rule_id: Cow::Owned(rule.origin.to_string()),
545                ty: RemarkType::Removed,
546            });
547        }
548        Redaction::Mask => {
549            let buf = vec!['*'; text.chars().count()];
550
551            output.push(Chunk::Redaction {
552                ty: RemarkType::Masked,
553                rule_id: Cow::Owned(rule.origin.to_string()),
554                text: buf.into_iter().collect(),
555            })
556        }
557        Redaction::Hash => {
558            output.push(Chunk::Redaction {
559                ty: RemarkType::Pseudonymized,
560                rule_id: Cow::Owned(rule.origin.to_string()),
561                text: Cow::Owned(utils::hash_value(text.as_bytes())),
562            });
563        }
564        Redaction::Replace(replace) => {
565            output.push(Chunk::Redaction {
566                ty: RemarkType::Substituted,
567                rule_id: Cow::Owned(rule.origin.to_string()),
568                text: Cow::Owned(replace.text.clone()),
569            });
570        }
571        Redaction::Other => relay_log::warn!("Incoming redaction is not supported"),
572    }
573}
574
575#[cfg(test)]
576mod tests {
577    use insta::{allow_duplicates, assert_debug_snapshot};
578    use relay_event_schema::processor::process_value;
579    use relay_event_schema::protocol::{
580        Addr, Breadcrumb, DebugImage, DebugMeta, ExtraValue, Headers, LogEntry, Message,
581        NativeDebugImage, Request, Span, TagEntry, Tags, TraceContext,
582    };
583    use relay_protocol::{assert_annotated_snapshot, get_value, FromValue, Object};
584    use serde_json::json;
585
586    use super::*;
587    use crate::{DataScrubbingConfig, PiiConfig, ReplaceRedaction};
588
589    fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Option<PiiConfig> {
590        use crate::convert::to_pii_config as to_pii_config_impl;
591        let rv = to_pii_config_impl(datascrubbing_config).unwrap();
592        if let Some(ref config) = rv {
593            let roundtrip: PiiConfig =
594                serde_json::from_value(serde_json::to_value(config).unwrap()).unwrap();
595            assert_eq!(&roundtrip, config);
596        }
597        rv
598    }
599
600    #[test]
601    fn test_scrub_original_value() {
602        let mut data = Event::from_value(
603            json!({
604                "user": {
605                    "username": "hey  man 73.133.27.120", // should be stripped despite not being "known ip field"
606                    "ip_address": "is this an ip address? 73.133.27.120", //  <--------
607                },
608                "hpkp":"invalid data my ip address is  74.133.27.120 and my credit card number is  4571234567890111 ",
609            })
610            .into(),
611        );
612
613        let scrubbing_config = DataScrubbingConfig {
614            scrub_data: true,
615            scrub_ip_addresses: true,
616            scrub_defaults: true,
617            ..Default::default()
618        };
619
620        let pii_config = to_pii_config(&scrubbing_config).unwrap();
621        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
622
623        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
624
625        assert_debug_snapshot!(&data);
626    }
627
628    #[test]
629    fn test_sentry_user() {
630        let mut data = Event::from_value(
631            json!({
632                "user": {
633                    "ip_address": "73.133.27.120",
634                    "sentry_user": "ip:73.133.27.120",
635                },
636            })
637            .into(),
638        );
639
640        let scrubbing_config = DataScrubbingConfig {
641            scrub_data: true,
642            scrub_ip_addresses: true,
643            scrub_defaults: true,
644            ..Default::default()
645        };
646
647        let pii_config = to_pii_config(&scrubbing_config).unwrap();
648        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
649
650        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
651
652        assert_debug_snapshot!(&data);
653    }
654
655    #[test]
656    fn test_basic_stripping() {
657        let config = serde_json::from_str::<PiiConfig>(
658            r#"
659            {
660                "rules": {
661                    "remove_bad_headers": {
662                        "type": "redact_pair",
663                        "keyPattern": "(?i)cookie|secret[-_]?key"
664                    }
665                },
666                "applications": {
667                    "$string": ["@ip"],
668                    "$object.**": ["remove_bad_headers"]
669                }
670            }
671            "#,
672        )
673        .unwrap();
674
675        let mut event = Annotated::new(Event {
676            logentry: Annotated::new(LogEntry {
677                formatted: Annotated::new("Hello world!".to_string().into()),
678                ..Default::default()
679            }),
680            request: Annotated::new(Request {
681                env: {
682                    let mut rv = Object::new();
683                    rv.insert(
684                        "SECRET_KEY".to_string(),
685                        Annotated::new(Value::String("134141231231231231231312".into())),
686                    );
687                    Annotated::new(rv)
688                },
689                headers: {
690                    let rv = vec![
691                        Annotated::new((
692                            Annotated::new("Cookie".to_string().into()),
693                            Annotated::new("super secret".to_string().into()),
694                        )),
695                        Annotated::new((
696                            Annotated::new("X-Forwarded-For".to_string().into()),
697                            Annotated::new("127.0.0.1".to_string().into()),
698                        )),
699                    ];
700                    Annotated::new(Headers(PairList(rv)))
701                },
702                ..Default::default()
703            }),
704            tags: Annotated::new(Tags(
705                vec![Annotated::new(TagEntry(
706                    Annotated::new("forwarded_for".to_string()),
707                    Annotated::new("127.0.0.1".to_string()),
708                ))]
709                .into(),
710            )),
711            ..Default::default()
712        });
713
714        let mut processor = PiiProcessor::new(config.compiled());
715        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
716        assert_annotated_snapshot!(event);
717    }
718
719    #[test]
720    fn test_redact_containers() {
721        let config = serde_json::from_str::<PiiConfig>(
722            r#"
723            {
724                "applications": {
725                    "$object": ["@anything"]
726                }
727            }
728            "#,
729        )
730        .unwrap();
731
732        let mut event = Annotated::new(Event {
733            extra: {
734                let mut map = Object::new();
735                map.insert(
736                    "foo".to_string(),
737                    Annotated::new(ExtraValue(Value::String("bar".to_string()))),
738                );
739                Annotated::new(map)
740            },
741            ..Default::default()
742        });
743
744        let mut processor = PiiProcessor::new(config.compiled());
745        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
746        assert_annotated_snapshot!(event);
747    }
748
749    #[test]
750    fn test_redact_custom_pattern() {
751        let config = serde_json::from_str::<PiiConfig>(
752            r#"
753            {
754                "applications": {
755                    "$string": ["myrule"]
756                },
757                "rules": {
758                    "myrule": {
759                        "type": "pattern",
760                        "pattern": "foo",
761                        "redaction": {
762                            "method": "replace",
763                            "text": "asd"
764                        }
765                    }
766                }
767            }
768            "#,
769        )
770        .unwrap();
771
772        let mut event = Annotated::new(Event {
773            extra: {
774                let mut map = Object::new();
775                map.insert(
776                    "myvalue".to_string(),
777                    Annotated::new(ExtraValue(Value::String("foobar".to_string()))),
778                );
779                Annotated::new(map)
780            },
781            ..Default::default()
782        });
783
784        let mut processor = PiiProcessor::new(config.compiled());
785        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
786        assert_annotated_snapshot!(event);
787    }
788
789    #[test]
790    fn test_no_field_upsert() {
791        let config = serde_json::from_str::<PiiConfig>(
792            r#"
793            {
794                "applications": {
795                    "**": ["@anything:remove"]
796                }
797            }
798            "#,
799        )
800        .unwrap();
801
802        let mut event = Annotated::new(Event {
803            extra: {
804                let mut map = Object::new();
805                map.insert(
806                    "myvalue".to_string(),
807                    Annotated::new(ExtraValue(Value::String("foobar".to_string()))),
808                );
809                Annotated::new(map)
810            },
811            ..Default::default()
812        });
813
814        let mut processor = PiiProcessor::new(config.compiled());
815        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
816        assert_annotated_snapshot!(event);
817    }
818
819    #[test]
820    fn test_anything_hash_on_string() {
821        let config = serde_json::from_str::<PiiConfig>(
822            r#"
823            {
824                "applications": {
825                    "$string": ["@anything:hash"]
826                }
827            }
828            "#,
829        )
830        .unwrap();
831
832        let mut event = Annotated::new(Event {
833            extra: {
834                let mut map = Object::new();
835                map.insert(
836                    "myvalue".to_string(),
837                    Annotated::new(ExtraValue(Value::String("foobar".to_string()))),
838                );
839                Annotated::new(map)
840            },
841            ..Default::default()
842        });
843
844        let mut processor = PiiProcessor::new(config.compiled());
845        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
846        assert_annotated_snapshot!(event);
847    }
848
849    #[test]
850    fn test_anything_hash_on_container() {
851        let config = serde_json::from_str::<PiiConfig>(
852            r#"
853            {
854                "applications": {
855                    "$object": ["@anything:hash"]
856                }
857            }
858            "#,
859        )
860        .unwrap();
861
862        let mut event = Annotated::new(Event {
863            extra: {
864                let mut map = Object::new();
865                map.insert(
866                    "myvalue".to_string(),
867                    Annotated::new(ExtraValue(Value::String("foobar".to_string()))),
868                );
869                Annotated::new(map)
870            },
871            ..Default::default()
872        });
873
874        let mut processor = PiiProcessor::new(config.compiled());
875        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
876        assert_annotated_snapshot!(event);
877    }
878
879    #[test]
880    fn test_ignore_user_agent_ip_scrubbing() {
881        let mut data = Event::from_value(
882            json!({
883                "request": {
884                    "headers": [
885                        ["User-Agent", "127.0.0.1"],
886                        ["X-Client-Ip", "10.0.0.1"]
887                    ]
888                },
889            })
890            .into(),
891        );
892
893        let scrubbing_config = DataScrubbingConfig {
894            scrub_data: true,
895            scrub_ip_addresses: true,
896            scrub_defaults: true,
897            ..Default::default()
898        };
899
900        let pii_config = to_pii_config(&scrubbing_config).unwrap();
901        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
902
903        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
904
905        assert_annotated_snapshot!(&data);
906    }
907
908    #[test]
909    fn test_remove_debugmeta_path() {
910        let config = serde_json::from_str::<PiiConfig>(
911            r#"
912            {
913                "applications": {
914                    "debug_meta.images.*.code_file": ["@anything:remove"],
915                    "debug_meta.images.*.debug_file": ["@anything:remove"]
916                }
917            }
918            "#,
919        )
920        .unwrap();
921
922        let mut event = Annotated::new(Event {
923            debug_meta: Annotated::new(DebugMeta {
924                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
925                    NativeDebugImage {
926                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
927                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
928                        debug_id: Annotated::new(
929                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
930                        ),
931                        debug_file: Annotated::new("wntdll.pdb".into()),
932                        debug_checksum: Annotated::empty(),
933                        arch: Annotated::new("arm64".to_string()),
934                        image_addr: Annotated::new(Addr(0)),
935                        image_size: Annotated::new(4096),
936                        image_vmaddr: Annotated::new(Addr(32768)),
937                        other: {
938                            let mut map = Object::new();
939                            map.insert(
940                                "other".to_string(),
941                                Annotated::new(Value::String("value".to_string())),
942                            );
943                            map
944                        },
945                    },
946                )))]),
947                ..Default::default()
948            }),
949            ..Default::default()
950        });
951
952        let mut processor = PiiProcessor::new(config.compiled());
953        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
954        assert_annotated_snapshot!(event);
955    }
956
957    #[test]
958    fn test_replace_debugmeta_path() {
959        let config = serde_json::from_str::<PiiConfig>(
960            r#"
961            {
962                "applications": {
963                    "debug_meta.images.*.code_file": ["@anything:replace"],
964                    "debug_meta.images.*.debug_file": ["@anything:replace"]
965                }
966            }
967            "#,
968        )
969        .unwrap();
970
971        let mut event = Annotated::new(Event {
972            debug_meta: Annotated::new(DebugMeta {
973                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
974                    NativeDebugImage {
975                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
976                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
977                        debug_id: Annotated::new(
978                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
979                        ),
980                        debug_file: Annotated::new("wntdll.pdb".into()),
981                        debug_checksum: Annotated::empty(),
982                        arch: Annotated::new("arm64".to_string()),
983                        image_addr: Annotated::new(Addr(0)),
984                        image_size: Annotated::new(4096),
985                        image_vmaddr: Annotated::new(Addr(32768)),
986                        other: {
987                            let mut map = Object::new();
988                            map.insert(
989                                "other".to_string(),
990                                Annotated::new(Value::String("value".to_string())),
991                            );
992                            map
993                        },
994                    },
995                )))]),
996                ..Default::default()
997            }),
998            ..Default::default()
999        });
1000
1001        let mut processor = PiiProcessor::new(config.compiled());
1002        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1003        assert_annotated_snapshot!(event);
1004    }
1005
1006    #[test]
1007    fn test_hash_debugmeta_path() {
1008        let config = serde_json::from_str::<PiiConfig>(
1009            r#"
1010            {
1011                "applications": {
1012                    "debug_meta.images.*.code_file": ["@anything:hash"],
1013                    "debug_meta.images.*.debug_file": ["@anything:hash"]
1014                }
1015            }
1016            "#,
1017        )
1018        .unwrap();
1019
1020        let mut event = Annotated::new(Event {
1021            debug_meta: Annotated::new(DebugMeta {
1022                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1023                    NativeDebugImage {
1024                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1025                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1026                        debug_id: Annotated::new(
1027                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1028                        ),
1029                        debug_file: Annotated::new("wntdll.pdb".into()),
1030                        debug_checksum: Annotated::empty(),
1031                        arch: Annotated::new("arm64".to_string()),
1032                        image_addr: Annotated::new(Addr(0)),
1033                        image_size: Annotated::new(4096),
1034                        image_vmaddr: Annotated::new(Addr(32768)),
1035                        other: {
1036                            let mut map = Object::new();
1037                            map.insert(
1038                                "other".to_string(),
1039                                Annotated::new(Value::String("value".to_string())),
1040                            );
1041                            map
1042                        },
1043                    },
1044                )))]),
1045                ..Default::default()
1046            }),
1047            ..Default::default()
1048        });
1049
1050        let mut processor = PiiProcessor::new(config.compiled());
1051        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1052        assert_annotated_snapshot!(event);
1053    }
1054
1055    #[test]
1056    fn test_debugmeta_path_not_addressible_with_wildcard_selector() {
1057        let config = serde_json::from_str::<PiiConfig>(
1058            r#"
1059            {
1060                "applications": {
1061                    "$string": ["@anything:remove"],
1062                    "**": ["@anything:remove"],
1063                    "debug_meta.**": ["@anything:remove"],
1064                    "(debug_meta.images.**.code_file & $string)": ["@anything:remove"]
1065                }
1066            }
1067            "#,
1068        )
1069        .unwrap();
1070
1071        let mut event = Annotated::new(Event {
1072            debug_meta: Annotated::new(DebugMeta {
1073                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1074                    NativeDebugImage {
1075                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1076                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1077                        debug_id: Annotated::new(
1078                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1079                        ),
1080                        debug_file: Annotated::new("wntdll.pdb".into()),
1081                        debug_checksum: Annotated::empty(),
1082                        arch: Annotated::new("arm64".to_string()),
1083                        image_addr: Annotated::new(Addr(0)),
1084                        image_size: Annotated::new(4096),
1085                        image_vmaddr: Annotated::new(Addr(32768)),
1086                        other: {
1087                            let mut map = Object::new();
1088                            map.insert(
1089                                "other".to_string(),
1090                                Annotated::new(Value::String("value".to_string())),
1091                            );
1092                            map
1093                        },
1094                    },
1095                )))]),
1096                ..Default::default()
1097            }),
1098            ..Default::default()
1099        });
1100
1101        let mut processor = PiiProcessor::new(config.compiled());
1102        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1103        assert_annotated_snapshot!(event);
1104    }
1105
1106    #[test]
1107    fn test_quoted_keys() {
1108        let config = serde_json::from_str::<PiiConfig>(
1109            r#"
1110            {
1111                "applications": {
1112                    "extra.'special ,./<>?!@#$%^&*())''gärbage'''": ["@anything:remove"]
1113                }
1114            }
1115            "#,
1116        )
1117        .unwrap();
1118
1119        let mut event = Annotated::new(Event {
1120            extra: {
1121                let mut map = Object::new();
1122                map.insert(
1123                    "do not ,./<>?!@#$%^&*())'ßtrip'".to_string(),
1124                    Annotated::new(ExtraValue(Value::String("foo".to_string()))),
1125                );
1126                map.insert(
1127                    "special ,./<>?!@#$%^&*())'gärbage'".to_string(),
1128                    Annotated::new(ExtraValue(Value::String("bar".to_string()))),
1129                );
1130                Annotated::new(map)
1131            },
1132            ..Default::default()
1133        });
1134
1135        let mut processor = PiiProcessor::new(config.compiled());
1136        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1137        assert_annotated_snapshot!(event);
1138    }
1139
1140    #[test]
1141    fn test_logentry_value_types() {
1142        // Assert that logentry.formatted is addressable as $string, $message and $logentry.formatted
1143        for formatted_selector in &[
1144            "$logentry.formatted",
1145            "$message",
1146            "$logentry.formatted && $message",
1147            "$string",
1148        ] {
1149            let config = serde_json::from_str::<PiiConfig>(&format!(
1150                r##"
1151                {{
1152                    "applications": {{
1153                        "{formatted_selector}": ["@anything:remove"]
1154                    }}
1155                }}
1156                "##
1157            ))
1158            .unwrap();
1159
1160            let mut event = Annotated::new(Event {
1161                logentry: Annotated::new(LogEntry {
1162                    formatted: Annotated::new("Hello world!".to_string().into()),
1163                    ..Default::default()
1164                }),
1165                ..Default::default()
1166            });
1167
1168            let mut processor = PiiProcessor::new(config.compiled());
1169            process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1170
1171            assert!(event
1172                .value()
1173                .unwrap()
1174                .logentry
1175                .value()
1176                .unwrap()
1177                .formatted
1178                .value()
1179                .is_none());
1180        }
1181    }
1182
1183    #[test]
1184    fn test_ip_address_hashing() {
1185        let config = serde_json::from_str::<PiiConfig>(
1186            r#"
1187            {
1188                "applications": {
1189                    "$user.ip_address": ["@ip:hash"]
1190                }
1191            }
1192            "#,
1193        )
1194        .unwrap();
1195
1196        let mut event = Annotated::new(Event {
1197            user: Annotated::new(User {
1198                ip_address: Annotated::new(IpAddr("127.0.0.1".to_string())),
1199                ..Default::default()
1200            }),
1201            ..Default::default()
1202        });
1203
1204        let mut processor = PiiProcessor::new(config.compiled());
1205        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1206
1207        let user = event.value().unwrap().user.value().unwrap();
1208
1209        assert!(user.ip_address.value().is_none());
1210
1211        assert_eq!(
1212            user.id.value().unwrap().as_str(),
1213            "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1214        );
1215    }
1216
1217    #[test]
1218    fn test_ip_address_hashing_does_not_overwrite_id() {
1219        let config = serde_json::from_str::<PiiConfig>(
1220            r#"
1221            {
1222                "applications": {
1223                    "$user.ip_address": ["@ip:hash"]
1224                }
1225            }
1226            "#,
1227        )
1228        .unwrap();
1229
1230        let mut event = Annotated::new(Event {
1231            user: Annotated::new(User {
1232                id: Annotated::new("123".to_string().into()),
1233                ip_address: Annotated::new(IpAddr("127.0.0.1".to_string())),
1234                ..Default::default()
1235            }),
1236            ..Default::default()
1237        });
1238
1239        let mut processor = PiiProcessor::new(config.compiled());
1240        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1241
1242        let user = event.value().unwrap().user.value().unwrap();
1243
1244        // This will get wiped out in renormalization though
1245        assert_eq!(
1246            user.ip_address.value().unwrap().as_str(),
1247            "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1248        );
1249
1250        assert_eq!(user.id.value().unwrap().as_str(), "123");
1251    }
1252
1253    #[test]
1254    fn test_replace_replaced_text() {
1255        let chunks = vec![Chunk::Redaction {
1256            text: "[ip]".into(),
1257            rule_id: "@ip".into(),
1258            ty: RemarkType::Substituted,
1259        }];
1260        let rule = RuleRef {
1261            id: "@ip:replace".into(),
1262            origin: "@ip".into(),
1263            ty: RuleType::Ip,
1264            redaction: Redaction::Replace(ReplaceRedaction {
1265                text: "[ip]".into(),
1266            }),
1267        };
1268        let res = apply_regex_to_chunks(
1269            chunks.clone(),
1270            &rule,
1271            &Regex::new(r#".*"#).unwrap(),
1272            ReplaceBehavior::Value,
1273        );
1274        assert_eq!(chunks, res);
1275    }
1276
1277    #[test]
1278    fn test_replace_replaced_text_anything() {
1279        let chunks = vec![Chunk::Redaction {
1280            text: "[Filtered]".into(),
1281            rule_id: "@password:filter".into(),
1282            ty: RemarkType::Substituted,
1283        }];
1284        let rule = RuleRef {
1285            id: "@anything:filter".into(),
1286            origin: "@anything:filter".into(),
1287            ty: RuleType::Anything,
1288            redaction: Redaction::Replace(ReplaceRedaction {
1289                text: "[Filtered]".into(),
1290            }),
1291        };
1292        let res = apply_regex_to_chunks(
1293            chunks.clone(),
1294            &rule,
1295            &Regex::new(r#".*"#).unwrap(),
1296            ReplaceBehavior::Groups(smallvec::smallvec![0]),
1297        );
1298        assert_eq!(chunks, res);
1299    }
1300
1301    #[test]
1302    fn test_trace_route_params_scrubbed() {
1303        let mut trace_context: Annotated<TraceContext> = Annotated::from_json(
1304            r#"
1305            {
1306                "type": "trace",
1307                "trace_id": "4c79f60c11214eb38604f4ae0781bfb2",
1308                "span_id": "fa90fdead5f74052",
1309                "data": {
1310                    "previousRoute": {
1311                        "params": {
1312                            "password": "test"
1313                        }
1314                    }
1315                }
1316            }
1317            "#,
1318        )
1319        .unwrap();
1320
1321        let ds_config = DataScrubbingConfig {
1322            scrub_data: true,
1323            scrub_defaults: true,
1324            ..Default::default()
1325        };
1326        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1327        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1328
1329        process_value(
1330            &mut trace_context,
1331            &mut pii_processor,
1332            ProcessingState::root(),
1333        )
1334        .unwrap();
1335        assert_annotated_snapshot!(trace_context);
1336    }
1337
1338    #[test]
1339    fn test_scrub_span_data_http_not_scrubbed() {
1340        let mut span: Annotated<Span> = Annotated::from_json(
1341            r#"{
1342                "data": {
1343                    "http": {
1344                        "query": "dance=true"
1345                    }
1346                }
1347            }"#,
1348        )
1349        .unwrap();
1350
1351        let ds_config = DataScrubbingConfig {
1352            scrub_data: true,
1353            scrub_defaults: true,
1354            ..Default::default()
1355        };
1356        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1357        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1358
1359        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1360        assert_annotated_snapshot!(span);
1361    }
1362
1363    #[test]
1364    fn test_scrub_span_data_http_strings_are_scrubbed() {
1365        let mut span: Annotated<Span> = Annotated::from_json(
1366            r#"{
1367                "data": {
1368                    "http": {
1369                        "query": "ccnumber=5105105105105100&process_id=123",
1370                        "fragment": "ccnumber=5105105105105100,process_id=123"
1371                    }
1372                }
1373            }"#,
1374        )
1375        .unwrap();
1376
1377        let ds_config = DataScrubbingConfig {
1378            scrub_data: true,
1379            scrub_defaults: true,
1380            ..Default::default()
1381        };
1382        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1383        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1384
1385        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1386        assert_annotated_snapshot!(span);
1387    }
1388
1389    #[test]
1390    fn test_scrub_span_data_http_objects_are_scrubbed() {
1391        let mut span: Annotated<Span> = Annotated::from_json(
1392            r#"{
1393                "data": {
1394                    "http": {
1395                        "query": {
1396                            "ccnumber": "5105105105105100",
1397                            "process_id": "123"
1398                        },
1399                        "fragment": {
1400                            "ccnumber": "5105105105105100",
1401                            "process_id": "123"
1402                        }
1403                    }
1404                }
1405            }"#,
1406        )
1407        .unwrap();
1408
1409        let ds_config = DataScrubbingConfig {
1410            scrub_data: true,
1411            scrub_defaults: true,
1412            ..Default::default()
1413        };
1414        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1415        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1416
1417        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1418        assert_annotated_snapshot!(span);
1419    }
1420
1421    #[test]
1422    fn test_scrub_span_data_untyped_props_are_scrubbed() {
1423        let mut span: Annotated<Span> = Annotated::from_json(
1424            r#"{
1425                "data": {
1426                    "untyped": "ccnumber=5105105105105100",
1427                    "more_untyped": {
1428                        "typed": "no",
1429                        "scrubbed": "yes",
1430                        "ccnumber": "5105105105105100"
1431                    }
1432                }
1433            }"#,
1434        )
1435        .unwrap();
1436
1437        let ds_config = DataScrubbingConfig {
1438            scrub_data: true,
1439            scrub_defaults: true,
1440            ..Default::default()
1441        };
1442        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1443        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1444
1445        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1446        assert_annotated_snapshot!(span);
1447    }
1448
1449    #[test]
1450    fn test_span_data_pii() {
1451        let mut span = Span::from_value(
1452            json!({
1453                "data": {
1454                    "code.filepath": "src/sentry/api/authentication.py",
1455                }
1456            })
1457            .into(),
1458        );
1459
1460        let ds_config = DataScrubbingConfig {
1461            scrub_data: true,
1462            scrub_defaults: true,
1463            ..Default::default()
1464        };
1465        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1466
1467        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1468        processor::process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1469        assert_eq!(
1470            get_value!(span.data.code_filepath!).as_str(),
1471            Some("src/sentry/api/authentication.py")
1472        );
1473    }
1474
1475    #[test]
1476    fn test_scrub_breadcrumb_data_http_not_scrubbed() {
1477        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1478            r#"{
1479                "data": {
1480                    "http": {
1481                        "query": "dance=true"
1482                    }
1483                }
1484            }"#,
1485        )
1486        .unwrap();
1487
1488        let ds_config = DataScrubbingConfig {
1489            scrub_data: true,
1490            scrub_defaults: true,
1491            ..Default::default()
1492        };
1493        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1494        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1495        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1496        assert_annotated_snapshot!(breadcrumb);
1497    }
1498
1499    #[test]
1500    fn test_scrub_breadcrumb_data_http_strings_are_scrubbed() {
1501        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1502            r#"{
1503                "data": {
1504                    "http": {
1505                        "query": "ccnumber=5105105105105100&process_id=123",
1506                        "fragment": "ccnumber=5105105105105100,process_id=123"
1507                    }
1508                }
1509            }"#,
1510        )
1511        .unwrap();
1512
1513        let ds_config = DataScrubbingConfig {
1514            scrub_data: true,
1515            scrub_defaults: true,
1516            ..Default::default()
1517        };
1518        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1519        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1520        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1521        assert_annotated_snapshot!(breadcrumb);
1522    }
1523
1524    #[test]
1525    fn test_scrub_breadcrumb_data_http_objects_are_scrubbed() {
1526        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1527            r#"{
1528                "data": {
1529                    "http": {
1530                        "query": {
1531                            "ccnumber": "5105105105105100",
1532                            "process_id": "123"
1533                        },
1534                        "fragment": {
1535                            "ccnumber": "5105105105105100",
1536                            "process_id": "123"
1537                        }
1538                    }
1539                }
1540            }"#,
1541        )
1542        .unwrap();
1543
1544        let ds_config = DataScrubbingConfig {
1545            scrub_data: true,
1546            scrub_defaults: true,
1547            ..Default::default()
1548        };
1549        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1550        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1551
1552        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1553        assert_annotated_snapshot!(breadcrumb);
1554    }
1555
1556    #[test]
1557    fn test_scrub_breadcrumb_data_untyped_props_are_scrubbed() {
1558        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1559            r#"{
1560                "data": {
1561                    "untyped": "ccnumber=5105105105105100",
1562                    "more_untyped": {
1563                        "typed": "no",
1564                        "scrubbed": "yes",
1565                        "ccnumber": "5105105105105100"
1566                    }
1567                }
1568            }"#,
1569        )
1570        .unwrap();
1571
1572        let ds_config = DataScrubbingConfig {
1573            scrub_data: true,
1574            scrub_defaults: true,
1575            ..Default::default()
1576        };
1577        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1578        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1579        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1580        assert_annotated_snapshot!(breadcrumb);
1581    }
1582
1583    #[test]
1584    fn test_scrub_graphql_response_data_with_variables() {
1585        let mut data = Event::from_value(
1586            json!({
1587              "request": {
1588                "data": {
1589                  "query": "{\n  viewer {\n    login\n  }\n}",
1590                  "variables": {
1591                    "login": "foo"
1592                  }
1593                },
1594                "api_target": "graphql"
1595              },
1596              "contexts": {
1597                "response": {
1598                  "type": "response",
1599                  "data": {
1600                    "data": {
1601                      "viewer": {
1602                        "login": "foo"
1603                      }
1604                    }
1605                  }
1606                }
1607              }
1608            })
1609            .into(),
1610        );
1611
1612        scrub_graphql(data.value_mut().as_mut().unwrap());
1613
1614        assert_debug_snapshot!(&data);
1615    }
1616
1617    #[test]
1618    fn test_scrub_graphql_response_data_without_variables() {
1619        let mut data = Event::from_value(
1620            json!({
1621              "request": {
1622                "data": {
1623                  "query": "{\n  viewer {\n    login\n  }\n}"
1624                },
1625                "api_target": "graphql"
1626              },
1627              "contexts": {
1628                "response": {
1629                  "type": "response",
1630                  "data": {
1631                    "data": {
1632                      "viewer": {
1633                        "login": "foo"
1634                      }
1635                    }
1636                  }
1637                }
1638              }
1639            })
1640            .into(),
1641        );
1642
1643        scrub_graphql(data.value_mut().as_mut().unwrap());
1644        assert_debug_snapshot!(&data);
1645    }
1646
1647    #[test]
1648    fn test_does_not_scrub_if_no_graphql() {
1649        let mut data = Event::from_value(
1650            json!({
1651              "request": {
1652                "data": {
1653                  "query": "{\n  viewer {\n    login\n  }\n}",
1654                  "variables": {
1655                    "login": "foo"
1656                  }
1657                },
1658              },
1659              "contexts": {
1660                "response": {
1661                  "type": "response",
1662                  "data": {
1663                    "data": {
1664                      "viewer": {
1665                        "login": "foo"
1666                      }
1667                    }
1668                  }
1669                }
1670              }
1671            })
1672            .into(),
1673        );
1674
1675        let scrubbing_config = DataScrubbingConfig {
1676            scrub_data: true,
1677            scrub_ip_addresses: true,
1678            scrub_defaults: true,
1679            ..Default::default()
1680        };
1681
1682        let pii_config = to_pii_config(&scrubbing_config).unwrap();
1683        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1684
1685        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1686
1687        assert_debug_snapshot!(&data);
1688    }
1689
1690    #[test]
1691    fn test_logentry_params_scrubbed() {
1692        let config = serde_json::from_str::<PiiConfig>(
1693            r##"
1694                {
1695                    "applications": {
1696                        "$string": ["@anything:remove"]
1697                    }
1698                }
1699                "##,
1700        )
1701        .unwrap();
1702
1703        let mut event = Annotated::new(Event {
1704            logentry: Annotated::new(LogEntry {
1705                message: Annotated::new(Message::from("failed to parse report id=%s".to_owned())),
1706                formatted: Annotated::new("failed to parse report id=1".to_string().into()),
1707                params: Annotated::new(Value::Array(vec![Annotated::new(Value::String(
1708                    "12345".to_owned(),
1709                ))])),
1710                ..Default::default()
1711            }),
1712            ..Default::default()
1713        });
1714
1715        let mut processor = PiiProcessor::new(config.compiled());
1716        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1717
1718        let params = get_value!(event.logentry.params!);
1719        assert_debug_snapshot!(params, @r#"Array(
1720    [
1721        Meta {
1722            remarks: [
1723                Remark {
1724                    ty: Removed,
1725                    rule_id: "@anything:remove",
1726                    range: None,
1727                },
1728            ],
1729            errors: [],
1730            original_length: None,
1731            original_value: None,
1732        },
1733    ],
1734)"#);
1735    }
1736
1737    #[test]
1738    fn test_is_pairlist() {
1739        for (case, expected) in [
1740            (r#"[]"#, false),
1741            (r#"["foo"]"#, false),
1742            (r#"["foo", 123]"#, false),
1743            (r#"[[1, "foo"]]"#, false),
1744            (r#"[[["too_nested", 123]]]"#, false),
1745            (r#"[["foo", "bar"], [1, "foo"]]"#, false),
1746            (r#"[["foo", "bar"], ["foo", "bar", "baz"]]"#, false),
1747            (r#"[["foo", "bar", "baz"], ["foo", "bar"]]"#, false),
1748            (r#"["foo", ["bar", "baz"], ["foo", "bar"]]"#, false),
1749            (r#"[["foo", "bar"], [["too_nested", 123]]]"#, false),
1750            (r#"[["foo", 123]]"#, true),
1751            (r#"[["foo", "bar"]]"#, true),
1752            (
1753                r#"[["foo", "bar"], ["foo", {"nested": {"something": 1}}]]"#,
1754                true,
1755            ),
1756        ] {
1757            let v = Annotated::<Value>::from_json(case).unwrap();
1758            let Annotated(Some(Value::Array(mut a)), _) = v else {
1759                panic!()
1760            };
1761            assert_eq!(is_pairlist(&mut a), expected, "{case}");
1762        }
1763    }
1764
1765    #[test]
1766    fn test_tuple_array_scrubbed_with_path_selector() {
1767        // We expect that both of these configs express the same semantics.
1768        let configs = vec![
1769            // This configuration matches on the authorization element (the 1st element of the array
1770            // represents the key).
1771            r##"
1772                {
1773                    "applications": {
1774                        "exception.values.0.stacktrace.frames.0.vars.headers.authorization": ["@anything:replace"]
1775                    }
1776                }
1777                "##,
1778            // This configuration matches on the 2nd element of the array.
1779            r##"
1780                {
1781                    "applications": {
1782                        "exception.values.0.stacktrace.frames.0.vars.headers.0.1": ["@anything:replace"]
1783                    }
1784                }
1785                "##,
1786        ];
1787
1788        let mut event = Event::from_value(
1789            serde_json::json!(
1790            {
1791              "message": "hi",
1792              "exception": {
1793                "values": [
1794                  {
1795                    "type": "BrokenException",
1796                    "value": "Something failed",
1797                    "stacktrace": {
1798                      "frames": [
1799                        {
1800                            "vars": {
1801                                "headers": [
1802                                    ["authorization", "Bearer abc123"]
1803                                ]
1804                            }
1805                        }
1806                      ]
1807                    }
1808                  }
1809                ]
1810              }
1811            })
1812            .into(),
1813        );
1814
1815        for config in configs {
1816            let config = serde_json::from_str::<PiiConfig>(config).unwrap();
1817            let mut processor = PiiProcessor::new(config.compiled());
1818            process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1819
1820            let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
1821
1822            allow_duplicates!(assert_debug_snapshot!(vars, @r#"
1823        FrameVars(
1824            {
1825                "headers": Array(
1826                    [
1827                        Array(
1828                            [
1829                                String(
1830                                    "authorization",
1831                                ),
1832                                Annotated(
1833                                    String(
1834                                        "[Filtered]",
1835                                    ),
1836                                    Meta {
1837                                        remarks: [
1838                                            Remark {
1839                                                ty: Substituted,
1840                                                rule_id: "@anything:replace",
1841                                                range: Some(
1842                                                    (
1843                                                        0,
1844                                                        10,
1845                                                    ),
1846                                                ),
1847                                            },
1848                                        ],
1849                                        errors: [],
1850                                        original_length: Some(
1851                                            13,
1852                                        ),
1853                                        original_value: None,
1854                                    },
1855                                ),
1856                            ],
1857                        ),
1858                    ],
1859                ),
1860            },
1861        )
1862        "#));
1863        }
1864    }
1865
1866    #[test]
1867    fn test_tuple_array_scrubbed_with_string_selector_and_password_matcher() {
1868        let config = serde_json::from_str::<PiiConfig>(
1869            r##"
1870                {
1871                    "applications": {
1872                        "$string": ["@password:remove"]
1873                    }
1874                }
1875                "##,
1876        )
1877        .unwrap();
1878
1879        let mut event = Event::from_value(
1880            serde_json::json!(
1881            {
1882              "message": "hi",
1883              "exception": {
1884                "values": [
1885                  {
1886                    "type": "BrokenException",
1887                    "value": "Something failed",
1888                    "stacktrace": {
1889                      "frames": [
1890                        {
1891                            "vars": {
1892                                "headers": [
1893                                    ["authorization", "abc123"]
1894                                ]
1895                            }
1896                        }
1897                      ]
1898                    }
1899                  }
1900                ]
1901              }
1902            })
1903            .into(),
1904        );
1905
1906        let mut processor = PiiProcessor::new(config.compiled());
1907        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1908
1909        let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
1910
1911        assert_debug_snapshot!(vars, @r###"
1912        FrameVars(
1913            {
1914                "headers": Array(
1915                    [
1916                        Array(
1917                            [
1918                                String(
1919                                    "authorization",
1920                                ),
1921                                Meta {
1922                                    remarks: [
1923                                        Remark {
1924                                            ty: Removed,
1925                                            rule_id: "@password:remove",
1926                                            range: None,
1927                                        },
1928                                    ],
1929                                    errors: [],
1930                                    original_length: None,
1931                                    original_value: None,
1932                                },
1933                            ],
1934                        ),
1935                    ],
1936                ),
1937            },
1938        )
1939        "###);
1940    }
1941}