relay_pii/
processor.rs

1use std::borrow::Cow;
2use std::collections::{BTreeMap, BTreeSet};
3use std::mem;
4use std::sync::OnceLock;
5
6use regex::Regex;
7use relay_event_schema::processor::{
8    self, Chunk, Pii, ProcessValue, ProcessingAction, ProcessingResult, ProcessingState, Processor,
9    ValueType, enum_set, process_value,
10};
11use relay_event_schema::protocol::{
12    AsPair, Event, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User,
13};
14use relay_protocol::{Annotated, Array, Meta, Remark, RemarkType, Value};
15
16use crate::compiledconfig::{CompiledPiiConfig, RuleRef};
17use crate::config::RuleType;
18use crate::redactions::Redaction;
19use crate::regexes::{self, ANYTHING_REGEX, PatternType, ReplaceBehavior};
20use crate::utils;
21
22/// A processor that performs PII stripping.
23pub struct PiiProcessor<'a> {
24    compiled_config: &'a CompiledPiiConfig,
25}
26
27impl<'a> PiiProcessor<'a> {
28    /// Creates a new processor based on a config.
29    pub fn new(compiled_config: &'a CompiledPiiConfig) -> PiiProcessor<'a> {
30        // this constructor needs to be cheap... a new PiiProcessor is created for each event. Move
31        // any init logic into CompiledPiiConfig::new.
32        PiiProcessor { compiled_config }
33    }
34
35    fn apply_all_rules(
36        &self,
37        meta: &mut Meta,
38        state: &ProcessingState<'_>,
39        mut value: Option<&mut String>,
40    ) -> ProcessingResult {
41        let pii = state.attrs().pii;
42        if pii == Pii::False {
43            return Ok(());
44        }
45
46        for (selector, rules) in self.compiled_config.applications.iter() {
47            if selector.matches_path(&state.path()) {
48                #[allow(clippy::needless_option_as_deref)]
49                for rule in rules {
50                    let reborrowed_value = value.as_deref_mut();
51                    apply_rule_to_value(meta, rule, state.path().key(), reborrowed_value)?;
52                }
53            }
54        }
55
56        Ok(())
57    }
58}
59
60impl Processor for PiiProcessor<'_> {
61    fn before_process<T: ProcessValue>(
62        &mut self,
63        value: Option<&T>,
64        meta: &mut Meta,
65        state: &ProcessingState<'_>,
66    ) -> ProcessingResult {
67        if let Some(Value::String(original_value)) = meta.original_value_as_mut() {
68            // Also apply pii scrubbing to the original value (set by normalization or other processors),
69            // such that we do not leak sensitive data through meta. Deletes `original_value` if an Error
70            // value is returned.
71            if let Some(parent) = state.iter().next() {
72                let path = state.path();
73                let new_state = parent.enter_borrowed(
74                    path.key().unwrap_or(""),
75                    Some(Cow::Borrowed(state.attrs())),
76                    enum_set!(ValueType::String),
77                );
78
79                if self
80                    .apply_all_rules(&mut Meta::default(), &new_state, Some(original_value))
81                    .is_err()
82                {
83                    // `apply_all_rules` returned `DeleteValueHard` or `DeleteValueSoft`, so delete the original as well.
84                    meta.set_original_value(Option::<String>::None);
85                }
86            }
87        }
88
89        // booleans cannot be PII, and strings are handled in process_string
90        if state.value_type().contains(ValueType::Boolean)
91            || state.value_type().contains(ValueType::String)
92        {
93            return Ok(());
94        }
95
96        if value.is_none() {
97            return Ok(());
98        }
99
100        // apply rules based on key/path
101        self.apply_all_rules(meta, state, None)
102    }
103
104    fn process_array<T>(
105        &mut self,
106        array: &mut Array<T>,
107        _meta: &mut Meta,
108        state: &ProcessingState<'_>,
109    ) -> ProcessingResult
110    where
111        T: ProcessValue,
112    {
113        if is_pairlist(array) {
114            for annotated in array {
115                let mut mapped = mem::take(annotated).map_value(T::into_value);
116
117                if let Some(Value::Array(pair)) = mapped.value_mut() {
118                    let mut value = mem::take(&mut pair[1]);
119                    let value_type = ValueType::for_field(&value);
120
121                    if let Some(key_name) = &pair[0].as_str() {
122                        // We enter the key of the first element of the array, since we treat it
123                        // as a pair.
124                        let key_state =
125                            state.enter_borrowed(key_name, state.inner_attrs(), value_type);
126                        // We process the value with a state that "simulates" the first value of the
127                        // array as if it was the key of a dictionary.
128                        process_value(&mut value, self, &key_state)?;
129                    }
130
131                    // Put value back into pair.
132                    pair[1] = value;
133                }
134
135                // Put pair back into array.
136                *annotated = T::from_value(mapped);
137            }
138
139            Ok(())
140        } else {
141            // If we didn't find a pairlist, we can process child values as normal.
142            array.process_child_values(self, state)
143        }
144    }
145
146    fn process_string(
147        &mut self,
148        value: &mut String,
149        meta: &mut Meta,
150        state: &ProcessingState<'_>,
151    ) -> ProcessingResult {
152        if let "" | "true" | "false" | "null" | "undefined" = value.as_str() {
153            return Ok(());
154        }
155
156        // same as before_process. duplicated here because we can only check for "true",
157        // "false" etc in process_string.
158        self.apply_all_rules(meta, state, Some(value))
159    }
160
161    fn process_native_image_path(
162        &mut self,
163        NativeImagePath(value): &mut NativeImagePath,
164        meta: &mut Meta,
165        state: &ProcessingState<'_>,
166    ) -> ProcessingResult {
167        // In NativeImagePath we must not strip the file's basename because that would break
168        // processing.
169        //
170        // We pop the basename from the end of the string, call process_string and push the
171        // basename again.
172        //
173        // The ranges in Meta should still be right as long as we only pop/push from the end of the
174        // string. If we decide that we need to preserve anything other than suffixes all PII
175        // tooltips/annotations are potentially wrong.
176
177        if let Some(index) = value.rfind(['/', '\\']) {
178            let basename = value.split_off(index);
179            match self.process_string(value, meta, state) {
180                Ok(()) => value.push_str(&basename),
181                Err(ProcessingAction::DeleteValueHard) | Err(ProcessingAction::DeleteValueSoft) => {
182                    basename[1..].clone_into(value);
183                }
184                Err(ProcessingAction::InvalidTransaction(x)) => {
185                    return Err(ProcessingAction::InvalidTransaction(x));
186                }
187            }
188        }
189
190        Ok(())
191    }
192
193    fn process_pairlist<T: ProcessValue + AsPair>(
194        &mut self,
195        value: &mut PairList<T>,
196        _meta: &mut Meta,
197        state: &ProcessingState,
198    ) -> ProcessingResult {
199        utils::process_pairlist(self, value, state)
200    }
201
202    fn process_user(
203        &mut self,
204        user: &mut User,
205        _meta: &mut Meta,
206        state: &ProcessingState<'_>,
207    ) -> ProcessingResult {
208        let ip_was_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
209
210        // Recurse into the user and does PII processing on fields.
211        user.process_child_values(self, state)?;
212
213        let has_other_fields = user.id.value().is_some()
214            || user.username.value().is_some()
215            || user.email.value().is_some();
216
217        let ip_is_still_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
218
219        // If the IP address has become invalid as part of PII processing, we move it into the user
220        // ID. That ensures people can do IP hashing and still have a correct users-affected count.
221        //
222        // Right now both Snuba and EventUser discard unparseable IPs for indexing, and we assume
223        // we want to keep it that way.
224        //
225        // If there are any other fields set that take priority over the IP for uniquely
226        // identifying a user (has_other_fields), we do not want to do anything. The value will be
227        // wiped out in renormalization anyway.
228        if ip_was_valid && !has_other_fields && !ip_is_still_valid {
229            user.id = mem::take(&mut user.ip_address).map_value(|ip| ip.into_inner().into());
230            user.ip_address.meta_mut().add_remark(Remark::new(
231                RemarkType::Removed,
232                "pii:ip_address".to_string(),
233            ));
234        }
235
236        Ok(())
237    }
238
239    // Replay PII processor entry point.
240    fn process_replay(
241        &mut self,
242        replay: &mut Replay,
243        _meta: &mut Meta,
244        state: &ProcessingState<'_>,
245    ) -> ProcessingResult {
246        replay.process_child_values(self, state)?;
247        Ok(())
248    }
249}
250
251#[derive(Default)]
252struct PairListProcessor {
253    is_pair: bool,
254    has_string_key: bool,
255}
256
257impl PairListProcessor {
258    /// Returns true if the processor identified the supplied data as an array composed of
259    /// a key (string) and a value.
260    fn is_pair_array(&self) -> bool {
261        self.is_pair && self.has_string_key
262    }
263}
264
265impl Processor for PairListProcessor {
266    fn process_array<T>(
267        &mut self,
268        value: &mut Array<T>,
269        _meta: &mut Meta,
270        state: &ProcessingState<'_>,
271    ) -> ProcessingResult
272    where
273        T: ProcessValue,
274    {
275        self.is_pair = state.depth() == 0 && value.len() == 2;
276        if self.is_pair {
277            let key_type = ValueType::for_field(&value[0]);
278            process_value(
279                &mut value[0],
280                self,
281                &state.enter_index(0, state.inner_attrs(), key_type),
282            )?;
283        }
284
285        Ok(())
286    }
287
288    fn process_string(
289        &mut self,
290        _value: &mut String,
291        _meta: &mut Meta,
292        state: &ProcessingState<'_>,
293    ) -> ProcessingResult where {
294        if state.depth() == 1 && state.path().index() == Some(0) {
295            self.has_string_key = true;
296        }
297
298        Ok(())
299    }
300}
301
302fn is_pairlist<T: ProcessValue>(array: &mut Array<T>) -> bool {
303    for element in array.iter_mut() {
304        let mut visitor = PairListProcessor::default();
305        process_value(element, &mut visitor, ProcessingState::root()).ok();
306        if !visitor.is_pair_array() {
307            return false;
308        }
309    }
310
311    !array.is_empty()
312}
313
314/// Scrubs GraphQL variables from the event.
315pub fn scrub_graphql(event: &mut Event) {
316    let mut keys: BTreeSet<&str> = BTreeSet::new();
317
318    let mut is_graphql = false;
319
320    // Collect the variables keys and scrub them out.
321    if let Some(request) = event.request.value_mut() {
322        if let Some(Value::Object(data)) = request.data.value_mut() {
323            if let Some(api_target) = request.api_target.value() {
324                if api_target.eq_ignore_ascii_case("graphql") {
325                    is_graphql = true;
326                }
327            }
328
329            if is_graphql {
330                if let Some(Annotated(Some(Value::Object(variables)), _)) =
331                    data.get_mut("variables")
332                {
333                    for (key, value) in variables.iter_mut() {
334                        keys.insert(key);
335                        value.set_value(Some(Value::String("[Filtered]".to_string())));
336                    }
337                }
338            }
339        }
340    }
341
342    if !is_graphql {
343        return;
344    }
345
346    // Scrub PII from the data object if they match the variables keys.
347    if let Some(contexts) = event.contexts.value_mut() {
348        if let Some(response) = contexts.get_mut::<ResponseContext>() {
349            if let Some(Value::Object(data)) = response.data.value_mut() {
350                if let Some(Annotated(Some(Value::Object(graphql_data)), _)) = data.get_mut("data")
351                {
352                    if !keys.is_empty() {
353                        scrub_graphql_data(&keys, graphql_data);
354                    } else {
355                        // If we don't have the variable keys, we scrub the whole data object
356                        // because the query or mutation weren't parameterized.
357                        data.remove("data");
358                    }
359                }
360            }
361        }
362    }
363}
364
365/// Scrubs values from the data object to `[Filtered]`.
366fn scrub_graphql_data(keys: &BTreeSet<&str>, data: &mut BTreeMap<String, Annotated<Value>>) {
367    for (key, value) in data.iter_mut() {
368        match value.value_mut() {
369            Some(Value::Object(item_data)) => {
370                scrub_graphql_data(keys, item_data);
371            }
372            _ => {
373                if keys.contains(key.as_str()) {
374                    value.set_value(Some(Value::String("[Filtered]".to_string())));
375                }
376            }
377        }
378    }
379}
380
381fn apply_rule_to_value(
382    meta: &mut Meta,
383    rule: &RuleRef,
384    key: Option<&str>,
385    mut value: Option<&mut String>,
386) -> ProcessingResult {
387    // The rule might specify to remove or to redact. If redaction is chosen, we need to
388    // chunk up the value, otherwise we need to simply mark the value for deletion.
389    let should_redact_chunks = !matches!(rule.redaction, Redaction::Default | Redaction::Remove);
390
391    // In case the value is not a string (but a container, bool or number) and the rule matches on
392    // anything, we can only remove the value (not replace, hash, etc).
393    if rule.ty == RuleType::Anything && (value.is_none() || !should_redact_chunks) {
394        // The value is a container, @anything on a container can do nothing but delete.
395        meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
396        return Err(ProcessingAction::DeleteValueHard);
397    }
398
399    macro_rules! apply_regex {
400        ($regex:expr, $replace_behavior:expr) => {
401            if let Some(ref mut value) = value {
402                processor::process_chunked_value(value, meta, |chunks| {
403                    apply_regex_to_chunks(chunks, rule, $regex, $replace_behavior)
404                });
405            }
406        };
407    }
408
409    for (pattern_type, regex, replace_behavior) in regexes::get_regex_for_rule_type(&rule.ty) {
410        match pattern_type {
411            PatternType::KeyValue => {
412                if regex.is_match(key.unwrap_or("")) {
413                    if value.is_some() && should_redact_chunks {
414                        // If we're given a string value here, redact the value like we would with
415                        // @anything.
416                        apply_regex!(&ANYTHING_REGEX, replace_behavior);
417                    } else {
418                        meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
419                        return Err(ProcessingAction::DeleteValueHard);
420                    }
421                } else {
422                    // If we did not redact using the key, we will redact the entire value if the key
423                    // appears in it.
424                    apply_regex!(regex, replace_behavior);
425                }
426            }
427            PatternType::Value => {
428                apply_regex!(regex, replace_behavior);
429            }
430        }
431    }
432
433    Ok(())
434}
435
436fn apply_regex_to_chunks<'a>(
437    chunks: Vec<Chunk<'a>>,
438    rule: &RuleRef,
439    regex: &Regex,
440    replace_behavior: ReplaceBehavior,
441) -> Vec<Chunk<'a>> {
442    // NB: This function allocates the entire string and all chunks a second time. This means it
443    // cannot reuse chunks and reallocates them. Ideally, we would be able to run the regex directly
444    // on the chunks, but the `regex` crate does not support that.
445
446    let mut search_string = String::new();
447    let mut has_text = false;
448    for chunk in &chunks {
449        match chunk {
450            Chunk::Text { text } => {
451                has_text = true;
452                search_string.push_str(&text.replace('\x00', ""));
453            }
454            Chunk::Redaction { .. } => search_string.push('\x00'),
455        }
456    }
457
458    if !has_text {
459        // Nothing to replace.
460        return chunks;
461    }
462
463    // Early exit if this regex does not match and return the original chunks.
464    let mut captures_iter = regex.captures_iter(&search_string).peekable();
465    if captures_iter.peek().is_none() {
466        return chunks;
467    }
468
469    let mut replacement_chunks = vec![];
470    for chunk in chunks {
471        if let Chunk::Redaction { .. } = chunk {
472            replacement_chunks.push(chunk);
473        }
474    }
475    replacement_chunks.reverse();
476
477    fn process_text<'a>(
478        text: &str,
479        rv: &mut Vec<Chunk<'a>>,
480        replacement_chunks: &mut Vec<Chunk<'a>>,
481    ) {
482        if text.is_empty() {
483            return;
484        }
485
486        static NULL_SPLIT_RE: OnceLock<Regex> = OnceLock::new();
487        let regex = NULL_SPLIT_RE.get_or_init(|| {
488            #[allow(clippy::trivial_regex)]
489            Regex::new("\x00").unwrap()
490        });
491
492        let mut pos = 0;
493        for piece in regex.find_iter(text) {
494            rv.push(Chunk::Text {
495                text: Cow::Owned(text[pos..piece.start()].to_string()),
496            });
497            rv.push(replacement_chunks.pop().unwrap());
498            pos = piece.end();
499        }
500
501        rv.push(Chunk::Text {
502            text: Cow::Owned(text[pos..].to_string()),
503        });
504    }
505
506    let mut pos = 0;
507    let mut rv = Vec::with_capacity(replacement_chunks.len());
508
509    match replace_behavior {
510        ReplaceBehavior::Groups(ref groups) => {
511            for m in captures_iter {
512                for (idx, g) in m.iter().enumerate() {
513                    if let Some(g) = g {
514                        if groups.contains(&(idx as u8)) {
515                            process_text(
516                                &search_string[pos..g.start()],
517                                &mut rv,
518                                &mut replacement_chunks,
519                            );
520                            insert_replacement_chunks(rule, g.as_str(), &mut rv);
521                            pos = g.end();
522                        }
523                    }
524                }
525            }
526            process_text(&search_string[pos..], &mut rv, &mut replacement_chunks);
527            debug_assert!(replacement_chunks.is_empty());
528        }
529        ReplaceBehavior::Value => {
530            // We only want to replace a string value, and the replacement chunk for that is
531            // inserted by insert_replacement_chunks. Adding chunks from replacement_chunks
532            // results in the incorrect behavior of a total of more chunks than the input.
533            insert_replacement_chunks(rule, &search_string, &mut rv);
534        }
535    }
536    rv
537}
538
539fn insert_replacement_chunks(rule: &RuleRef, text: &str, output: &mut Vec<Chunk<'_>>) {
540    match &rule.redaction {
541        Redaction::Default | Redaction::Remove => {
542            output.push(Chunk::Redaction {
543                text: Cow::Borrowed(""),
544                rule_id: Cow::Owned(rule.origin.to_string()),
545                ty: RemarkType::Removed,
546            });
547        }
548        Redaction::Mask => {
549            let buf = vec!['*'; text.chars().count()];
550
551            output.push(Chunk::Redaction {
552                ty: RemarkType::Masked,
553                rule_id: Cow::Owned(rule.origin.to_string()),
554                text: buf.into_iter().collect(),
555            })
556        }
557        Redaction::Hash => {
558            output.push(Chunk::Redaction {
559                ty: RemarkType::Pseudonymized,
560                rule_id: Cow::Owned(rule.origin.to_string()),
561                text: Cow::Owned(utils::hash_value(text.as_bytes())),
562            });
563        }
564        Redaction::Replace(replace) => {
565            output.push(Chunk::Redaction {
566                ty: RemarkType::Substituted,
567                rule_id: Cow::Owned(rule.origin.to_string()),
568                text: Cow::Owned(replace.text.clone()),
569            });
570        }
571        Redaction::Other => relay_log::warn!("Incoming redaction is not supported"),
572    }
573}
574
575#[cfg(test)]
576mod tests {
577    use insta::{allow_duplicates, assert_debug_snapshot};
578    use relay_event_schema::processor::process_value;
579    use relay_event_schema::protocol::{
580        Addr, Breadcrumb, DebugImage, DebugMeta, ExtraValue, Headers, LogEntry, Message,
581        NativeDebugImage, Request, Span, TagEntry, Tags, TraceContext,
582    };
583    use relay_protocol::{FromValue, Object, assert_annotated_snapshot, get_value};
584    use serde_json::json;
585
586    use super::*;
587    use crate::{DataScrubbingConfig, PiiConfig, ReplaceRedaction};
588
589    fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Option<PiiConfig> {
590        use crate::convert::to_pii_config as to_pii_config_impl;
591        let rv = to_pii_config_impl(datascrubbing_config).unwrap();
592        if let Some(ref config) = rv {
593            let roundtrip: PiiConfig =
594                serde_json::from_value(serde_json::to_value(config).unwrap()).unwrap();
595            assert_eq!(&roundtrip, config);
596        }
597        rv
598    }
599
600    #[test]
601    fn test_scrub_original_value() {
602        let mut data = Event::from_value(
603            json!({
604                "user": {
605                    "username": "hey  man 73.133.27.120", // should be stripped despite not being "known ip field"
606                    "ip_address": "is this an ip address? 73.133.27.120", //  <--------
607                },
608                "hpkp":"invalid data my ip address is  74.133.27.120 and my credit card number is  4571234567890111 ",
609            })
610            .into(),
611        );
612
613        let scrubbing_config = DataScrubbingConfig {
614            scrub_data: true,
615            scrub_ip_addresses: true,
616            scrub_defaults: true,
617            ..Default::default()
618        };
619
620        let pii_config = to_pii_config(&scrubbing_config).unwrap();
621        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
622
623        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
624
625        assert_debug_snapshot!(&data);
626    }
627
628    #[test]
629    fn test_sentry_user() {
630        let mut data = Event::from_value(
631            json!({
632                "user": {
633                    "ip_address": "73.133.27.120",
634                    "sentry_user": "ip:73.133.27.120",
635                },
636            })
637            .into(),
638        );
639
640        let scrubbing_config = DataScrubbingConfig {
641            scrub_data: true,
642            scrub_ip_addresses: true,
643            scrub_defaults: true,
644            ..Default::default()
645        };
646
647        let pii_config = to_pii_config(&scrubbing_config).unwrap();
648        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
649
650        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
651
652        assert_debug_snapshot!(&data);
653    }
654
655    #[test]
656    fn test_basic_stripping() {
657        let config = serde_json::from_str::<PiiConfig>(
658            r#"
659            {
660                "rules": {
661                    "remove_bad_headers": {
662                        "type": "redact_pair",
663                        "keyPattern": "(?i)cookie|secret[-_]?key"
664                    }
665                },
666                "applications": {
667                    "$string": ["@ip"],
668                    "$object.**": ["remove_bad_headers"]
669                }
670            }
671            "#,
672        )
673        .unwrap();
674
675        let mut event = Annotated::new(Event {
676            logentry: Annotated::new(LogEntry {
677                formatted: Annotated::new("Hello world!".to_string().into()),
678                ..Default::default()
679            }),
680            request: Annotated::new(Request {
681                env: {
682                    let mut rv = Object::new();
683                    rv.insert(
684                        "SECRET_KEY".to_string(),
685                        Annotated::new(Value::String("134141231231231231231312".into())),
686                    );
687                    Annotated::new(rv)
688                },
689                headers: {
690                    let rv = vec![
691                        Annotated::new((
692                            Annotated::new("Cookie".to_string().into()),
693                            Annotated::new("super secret".to_string().into()),
694                        )),
695                        Annotated::new((
696                            Annotated::new("X-Forwarded-For".to_string().into()),
697                            Annotated::new("127.0.0.1".to_string().into()),
698                        )),
699                    ];
700                    Annotated::new(Headers(PairList(rv)))
701                },
702                ..Default::default()
703            }),
704            tags: Annotated::new(Tags(
705                vec![Annotated::new(TagEntry(
706                    Annotated::new("forwarded_for".to_string()),
707                    Annotated::new("127.0.0.1".to_string()),
708                ))]
709                .into(),
710            )),
711            ..Default::default()
712        });
713
714        let mut processor = PiiProcessor::new(config.compiled());
715        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
716        assert_annotated_snapshot!(event);
717    }
718
719    #[test]
720    fn test_redact_containers() {
721        let config = serde_json::from_str::<PiiConfig>(
722            r#"
723            {
724                "applications": {
725                    "$object": ["@anything"]
726                }
727            }
728            "#,
729        )
730        .unwrap();
731
732        let mut event = Annotated::new(Event {
733            extra: {
734                let mut map = Object::new();
735                map.insert(
736                    "foo".to_string(),
737                    Annotated::new(ExtraValue(Value::String("bar".to_string()))),
738                );
739                Annotated::new(map)
740            },
741            ..Default::default()
742        });
743
744        let mut processor = PiiProcessor::new(config.compiled());
745        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
746        assert_annotated_snapshot!(event);
747    }
748
749    #[test]
750    fn test_redact_custom_pattern() {
751        let config = serde_json::from_str::<PiiConfig>(
752            r#"
753            {
754                "applications": {
755                    "$string": ["myrule"]
756                },
757                "rules": {
758                    "myrule": {
759                        "type": "pattern",
760                        "pattern": "foo",
761                        "redaction": {
762                            "method": "replace",
763                            "text": "asd"
764                        }
765                    }
766                }
767            }
768            "#,
769        )
770        .unwrap();
771
772        let mut event = Annotated::new(Event {
773            extra: {
774                let mut map = Object::new();
775                map.insert(
776                    "myvalue".to_string(),
777                    Annotated::new(ExtraValue(Value::String("foobar".to_string()))),
778                );
779                Annotated::new(map)
780            },
781            ..Default::default()
782        });
783
784        let mut processor = PiiProcessor::new(config.compiled());
785        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
786        assert_annotated_snapshot!(event);
787    }
788
789    #[test]
790    fn test_no_field_upsert() {
791        let config = serde_json::from_str::<PiiConfig>(
792            r#"
793            {
794                "applications": {
795                    "**": ["@anything:remove"]
796                }
797            }
798            "#,
799        )
800        .unwrap();
801
802        let mut event = Annotated::new(Event {
803            extra: {
804                let mut map = Object::new();
805                map.insert(
806                    "myvalue".to_string(),
807                    Annotated::new(ExtraValue(Value::String("foobar".to_string()))),
808                );
809                Annotated::new(map)
810            },
811            ..Default::default()
812        });
813
814        let mut processor = PiiProcessor::new(config.compiled());
815        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
816        assert_annotated_snapshot!(event);
817    }
818
819    #[test]
820    fn test_anything_hash_on_string() {
821        let config = serde_json::from_str::<PiiConfig>(
822            r#"
823            {
824                "applications": {
825                    "$string": ["@anything:hash"]
826                }
827            }
828            "#,
829        )
830        .unwrap();
831
832        let mut event = Annotated::new(Event {
833            extra: {
834                let mut map = Object::new();
835                map.insert(
836                    "myvalue".to_string(),
837                    Annotated::new(ExtraValue(Value::String("foobar".to_string()))),
838                );
839                Annotated::new(map)
840            },
841            ..Default::default()
842        });
843
844        let mut processor = PiiProcessor::new(config.compiled());
845        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
846        assert_annotated_snapshot!(event);
847    }
848
849    #[test]
850    fn test_anything_hash_on_container() {
851        let config = serde_json::from_str::<PiiConfig>(
852            r#"
853            {
854                "applications": {
855                    "$object": ["@anything:hash"]
856                }
857            }
858            "#,
859        )
860        .unwrap();
861
862        let mut event = Annotated::new(Event {
863            extra: {
864                let mut map = Object::new();
865                map.insert(
866                    "myvalue".to_string(),
867                    Annotated::new(ExtraValue(Value::String("foobar".to_string()))),
868                );
869                Annotated::new(map)
870            },
871            ..Default::default()
872        });
873
874        let mut processor = PiiProcessor::new(config.compiled());
875        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
876        assert_annotated_snapshot!(event);
877    }
878
879    #[test]
880    fn test_ignore_user_agent_ip_scrubbing() {
881        let mut data = Event::from_value(
882            json!({
883                "request": {
884                    "headers": [
885                        ["User-Agent", "127.0.0.1"],
886                        ["X-Client-Ip", "10.0.0.1"]
887                    ]
888                },
889            })
890            .into(),
891        );
892
893        let scrubbing_config = DataScrubbingConfig {
894            scrub_data: true,
895            scrub_ip_addresses: true,
896            scrub_defaults: true,
897            ..Default::default()
898        };
899
900        let pii_config = to_pii_config(&scrubbing_config).unwrap();
901        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
902
903        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
904
905        assert_annotated_snapshot!(&data);
906    }
907
908    #[test]
909    fn test_remove_debugmeta_path() {
910        let config = serde_json::from_str::<PiiConfig>(
911            r#"
912            {
913                "applications": {
914                    "debug_meta.images.*.code_file": ["@anything:remove"],
915                    "debug_meta.images.*.debug_file": ["@anything:remove"]
916                }
917            }
918            "#,
919        )
920        .unwrap();
921
922        let mut event = Annotated::new(Event {
923            debug_meta: Annotated::new(DebugMeta {
924                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
925                    NativeDebugImage {
926                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
927                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
928                        debug_id: Annotated::new(
929                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
930                        ),
931                        debug_file: Annotated::new("wntdll.pdb".into()),
932                        debug_checksum: Annotated::empty(),
933                        arch: Annotated::new("arm64".to_string()),
934                        image_addr: Annotated::new(Addr(0)),
935                        image_size: Annotated::new(4096),
936                        image_vmaddr: Annotated::new(Addr(32768)),
937                        other: {
938                            let mut map = Object::new();
939                            map.insert(
940                                "other".to_string(),
941                                Annotated::new(Value::String("value".to_string())),
942                            );
943                            map
944                        },
945                    },
946                )))]),
947                ..Default::default()
948            }),
949            ..Default::default()
950        });
951
952        let mut processor = PiiProcessor::new(config.compiled());
953        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
954        assert_annotated_snapshot!(event);
955    }
956
957    #[test]
958    fn test_replace_debugmeta_path() {
959        let config = serde_json::from_str::<PiiConfig>(
960            r#"
961            {
962                "applications": {
963                    "debug_meta.images.*.code_file": ["@anything:replace"],
964                    "debug_meta.images.*.debug_file": ["@anything:replace"]
965                }
966            }
967            "#,
968        )
969        .unwrap();
970
971        let mut event = Annotated::new(Event {
972            debug_meta: Annotated::new(DebugMeta {
973                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
974                    NativeDebugImage {
975                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
976                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
977                        debug_id: Annotated::new(
978                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
979                        ),
980                        debug_file: Annotated::new("wntdll.pdb".into()),
981                        debug_checksum: Annotated::empty(),
982                        arch: Annotated::new("arm64".to_string()),
983                        image_addr: Annotated::new(Addr(0)),
984                        image_size: Annotated::new(4096),
985                        image_vmaddr: Annotated::new(Addr(32768)),
986                        other: {
987                            let mut map = Object::new();
988                            map.insert(
989                                "other".to_string(),
990                                Annotated::new(Value::String("value".to_string())),
991                            );
992                            map
993                        },
994                    },
995                )))]),
996                ..Default::default()
997            }),
998            ..Default::default()
999        });
1000
1001        let mut processor = PiiProcessor::new(config.compiled());
1002        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1003        assert_annotated_snapshot!(event);
1004    }
1005
1006    #[test]
1007    fn test_hash_debugmeta_path() {
1008        let config = serde_json::from_str::<PiiConfig>(
1009            r#"
1010            {
1011                "applications": {
1012                    "debug_meta.images.*.code_file": ["@anything:hash"],
1013                    "debug_meta.images.*.debug_file": ["@anything:hash"]
1014                }
1015            }
1016            "#,
1017        )
1018        .unwrap();
1019
1020        let mut event = Annotated::new(Event {
1021            debug_meta: Annotated::new(DebugMeta {
1022                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1023                    NativeDebugImage {
1024                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1025                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1026                        debug_id: Annotated::new(
1027                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1028                        ),
1029                        debug_file: Annotated::new("wntdll.pdb".into()),
1030                        debug_checksum: Annotated::empty(),
1031                        arch: Annotated::new("arm64".to_string()),
1032                        image_addr: Annotated::new(Addr(0)),
1033                        image_size: Annotated::new(4096),
1034                        image_vmaddr: Annotated::new(Addr(32768)),
1035                        other: {
1036                            let mut map = Object::new();
1037                            map.insert(
1038                                "other".to_string(),
1039                                Annotated::new(Value::String("value".to_string())),
1040                            );
1041                            map
1042                        },
1043                    },
1044                )))]),
1045                ..Default::default()
1046            }),
1047            ..Default::default()
1048        });
1049
1050        let mut processor = PiiProcessor::new(config.compiled());
1051        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1052        assert_annotated_snapshot!(event);
1053    }
1054
1055    #[test]
1056    fn test_debugmeta_path_not_addressible_with_wildcard_selector() {
1057        let config = serde_json::from_str::<PiiConfig>(
1058            r#"
1059            {
1060                "applications": {
1061                    "$string": ["@anything:remove"],
1062                    "**": ["@anything:remove"],
1063                    "debug_meta.**": ["@anything:remove"],
1064                    "(debug_meta.images.**.code_file & $string)": ["@anything:remove"]
1065                }
1066            }
1067            "#,
1068        )
1069        .unwrap();
1070
1071        let mut event = Annotated::new(Event {
1072            debug_meta: Annotated::new(DebugMeta {
1073                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1074                    NativeDebugImage {
1075                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1076                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1077                        debug_id: Annotated::new(
1078                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1079                        ),
1080                        debug_file: Annotated::new("wntdll.pdb".into()),
1081                        debug_checksum: Annotated::empty(),
1082                        arch: Annotated::new("arm64".to_string()),
1083                        image_addr: Annotated::new(Addr(0)),
1084                        image_size: Annotated::new(4096),
1085                        image_vmaddr: Annotated::new(Addr(32768)),
1086                        other: {
1087                            let mut map = Object::new();
1088                            map.insert(
1089                                "other".to_string(),
1090                                Annotated::new(Value::String("value".to_string())),
1091                            );
1092                            map
1093                        },
1094                    },
1095                )))]),
1096                ..Default::default()
1097            }),
1098            ..Default::default()
1099        });
1100
1101        let mut processor = PiiProcessor::new(config.compiled());
1102        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1103        assert_annotated_snapshot!(event);
1104    }
1105
1106    #[test]
1107    fn test_quoted_keys() {
1108        let config = serde_json::from_str::<PiiConfig>(
1109            r#"
1110            {
1111                "applications": {
1112                    "extra.'special ,./<>?!@#$%^&*())''gärbage'''": ["@anything:remove"]
1113                }
1114            }
1115            "#,
1116        )
1117        .unwrap();
1118
1119        let mut event = Annotated::new(Event {
1120            extra: {
1121                let mut map = Object::new();
1122                map.insert(
1123                    "do not ,./<>?!@#$%^&*())'ßtrip'".to_string(),
1124                    Annotated::new(ExtraValue(Value::String("foo".to_string()))),
1125                );
1126                map.insert(
1127                    "special ,./<>?!@#$%^&*())'gärbage'".to_string(),
1128                    Annotated::new(ExtraValue(Value::String("bar".to_string()))),
1129                );
1130                Annotated::new(map)
1131            },
1132            ..Default::default()
1133        });
1134
1135        let mut processor = PiiProcessor::new(config.compiled());
1136        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1137        assert_annotated_snapshot!(event);
1138    }
1139
1140    #[test]
1141    fn test_logentry_value_types() {
1142        // Assert that logentry.formatted is addressable as $string, $message and $logentry.formatted
1143        for formatted_selector in &[
1144            "$logentry.formatted",
1145            "$message",
1146            "$logentry.formatted && $message",
1147            "$string",
1148        ] {
1149            let config = serde_json::from_str::<PiiConfig>(&format!(
1150                r##"
1151                {{
1152                    "applications": {{
1153                        "{formatted_selector}": ["@anything:remove"]
1154                    }}
1155                }}
1156                "##
1157            ))
1158            .unwrap();
1159
1160            let mut event = Annotated::new(Event {
1161                logentry: Annotated::new(LogEntry {
1162                    formatted: Annotated::new("Hello world!".to_string().into()),
1163                    ..Default::default()
1164                }),
1165                ..Default::default()
1166            });
1167
1168            let mut processor = PiiProcessor::new(config.compiled());
1169            process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1170
1171            assert!(
1172                event
1173                    .value()
1174                    .unwrap()
1175                    .logentry
1176                    .value()
1177                    .unwrap()
1178                    .formatted
1179                    .value()
1180                    .is_none()
1181            );
1182        }
1183    }
1184
1185    #[test]
1186    fn test_ip_address_hashing() {
1187        let config = serde_json::from_str::<PiiConfig>(
1188            r#"
1189            {
1190                "applications": {
1191                    "$user.ip_address": ["@ip:hash"]
1192                }
1193            }
1194            "#,
1195        )
1196        .unwrap();
1197
1198        let mut event = Annotated::new(Event {
1199            user: Annotated::new(User {
1200                ip_address: Annotated::new(IpAddr("127.0.0.1".to_string())),
1201                ..Default::default()
1202            }),
1203            ..Default::default()
1204        });
1205
1206        let mut processor = PiiProcessor::new(config.compiled());
1207        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1208
1209        let user = event.value().unwrap().user.value().unwrap();
1210
1211        assert!(user.ip_address.value().is_none());
1212
1213        assert_eq!(
1214            user.id.value().unwrap().as_str(),
1215            "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1216        );
1217    }
1218
1219    #[test]
1220    fn test_ip_address_hashing_does_not_overwrite_id() {
1221        let config = serde_json::from_str::<PiiConfig>(
1222            r#"
1223            {
1224                "applications": {
1225                    "$user.ip_address": ["@ip:hash"]
1226                }
1227            }
1228            "#,
1229        )
1230        .unwrap();
1231
1232        let mut event = Annotated::new(Event {
1233            user: Annotated::new(User {
1234                id: Annotated::new("123".to_string().into()),
1235                ip_address: Annotated::new(IpAddr("127.0.0.1".to_string())),
1236                ..Default::default()
1237            }),
1238            ..Default::default()
1239        });
1240
1241        let mut processor = PiiProcessor::new(config.compiled());
1242        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1243
1244        let user = event.value().unwrap().user.value().unwrap();
1245
1246        // This will get wiped out in renormalization though
1247        assert_eq!(
1248            user.ip_address.value().unwrap().as_str(),
1249            "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1250        );
1251
1252        assert_eq!(user.id.value().unwrap().as_str(), "123");
1253    }
1254
1255    #[test]
1256    fn test_replace_replaced_text() {
1257        let chunks = vec![Chunk::Redaction {
1258            text: "[ip]".into(),
1259            rule_id: "@ip".into(),
1260            ty: RemarkType::Substituted,
1261        }];
1262        let rule = RuleRef {
1263            id: "@ip:replace".into(),
1264            origin: "@ip".into(),
1265            ty: RuleType::Ip,
1266            redaction: Redaction::Replace(ReplaceRedaction {
1267                text: "[ip]".into(),
1268            }),
1269        };
1270        let res = apply_regex_to_chunks(
1271            chunks.clone(),
1272            &rule,
1273            &Regex::new(r#".*"#).unwrap(),
1274            ReplaceBehavior::Value,
1275        );
1276        assert_eq!(chunks, res);
1277    }
1278
1279    #[test]
1280    fn test_replace_replaced_text_anything() {
1281        let chunks = vec![Chunk::Redaction {
1282            text: "[Filtered]".into(),
1283            rule_id: "@password:filter".into(),
1284            ty: RemarkType::Substituted,
1285        }];
1286        let rule = RuleRef {
1287            id: "@anything:filter".into(),
1288            origin: "@anything:filter".into(),
1289            ty: RuleType::Anything,
1290            redaction: Redaction::Replace(ReplaceRedaction {
1291                text: "[Filtered]".into(),
1292            }),
1293        };
1294        let res = apply_regex_to_chunks(
1295            chunks.clone(),
1296            &rule,
1297            &Regex::new(r#".*"#).unwrap(),
1298            ReplaceBehavior::Groups(smallvec::smallvec![0]),
1299        );
1300        assert_eq!(chunks, res);
1301    }
1302
1303    #[test]
1304    fn test_trace_route_params_scrubbed() {
1305        let mut trace_context: Annotated<TraceContext> = Annotated::from_json(
1306            r#"
1307            {
1308                "type": "trace",
1309                "trace_id": "4c79f60c11214eb38604f4ae0781bfb2",
1310                "span_id": "fa90fdead5f74052",
1311                "data": {
1312                    "previousRoute": {
1313                        "params": {
1314                            "password": "test"
1315                        }
1316                    }
1317                }
1318            }
1319            "#,
1320        )
1321        .unwrap();
1322
1323        let ds_config = DataScrubbingConfig {
1324            scrub_data: true,
1325            scrub_defaults: true,
1326            ..Default::default()
1327        };
1328        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1329        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1330
1331        process_value(
1332            &mut trace_context,
1333            &mut pii_processor,
1334            ProcessingState::root(),
1335        )
1336        .unwrap();
1337        assert_annotated_snapshot!(trace_context);
1338    }
1339
1340    #[test]
1341    fn test_scrub_span_data_http_not_scrubbed() {
1342        let mut span: Annotated<Span> = Annotated::from_json(
1343            r#"{
1344                "data": {
1345                    "http": {
1346                        "query": "dance=true"
1347                    }
1348                }
1349            }"#,
1350        )
1351        .unwrap();
1352
1353        let ds_config = DataScrubbingConfig {
1354            scrub_data: true,
1355            scrub_defaults: true,
1356            ..Default::default()
1357        };
1358        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1359        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1360
1361        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1362        assert_annotated_snapshot!(span);
1363    }
1364
1365    #[test]
1366    fn test_scrub_span_data_http_strings_are_scrubbed() {
1367        let mut span: Annotated<Span> = Annotated::from_json(
1368            r#"{
1369                "data": {
1370                    "http": {
1371                        "query": "ccnumber=5105105105105100&process_id=123",
1372                        "fragment": "ccnumber=5105105105105100,process_id=123"
1373                    }
1374                }
1375            }"#,
1376        )
1377        .unwrap();
1378
1379        let ds_config = DataScrubbingConfig {
1380            scrub_data: true,
1381            scrub_defaults: true,
1382            ..Default::default()
1383        };
1384        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1385        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1386
1387        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1388        assert_annotated_snapshot!(span);
1389    }
1390
1391    #[test]
1392    fn test_scrub_span_data_http_objects_are_scrubbed() {
1393        let mut span: Annotated<Span> = Annotated::from_json(
1394            r#"{
1395                "data": {
1396                    "http": {
1397                        "query": {
1398                            "ccnumber": "5105105105105100",
1399                            "process_id": "123"
1400                        },
1401                        "fragment": {
1402                            "ccnumber": "5105105105105100",
1403                            "process_id": "123"
1404                        }
1405                    }
1406                }
1407            }"#,
1408        )
1409        .unwrap();
1410
1411        let ds_config = DataScrubbingConfig {
1412            scrub_data: true,
1413            scrub_defaults: true,
1414            ..Default::default()
1415        };
1416        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1417        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1418
1419        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1420        assert_annotated_snapshot!(span);
1421    }
1422
1423    #[test]
1424    fn test_scrub_span_data_untyped_props_are_scrubbed() {
1425        let mut span: Annotated<Span> = Annotated::from_json(
1426            r#"{
1427                "data": {
1428                    "untyped": "ccnumber=5105105105105100",
1429                    "more_untyped": {
1430                        "typed": "no",
1431                        "scrubbed": "yes",
1432                        "ccnumber": "5105105105105100"
1433                    }
1434                }
1435            }"#,
1436        )
1437        .unwrap();
1438
1439        let ds_config = DataScrubbingConfig {
1440            scrub_data: true,
1441            scrub_defaults: true,
1442            ..Default::default()
1443        };
1444        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1445        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1446
1447        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1448        assert_annotated_snapshot!(span);
1449    }
1450
1451    #[test]
1452    fn test_span_data_pii() {
1453        let mut span = Span::from_value(
1454            json!({
1455                "data": {
1456                    "code.filepath": "src/sentry/api/authentication.py",
1457                }
1458            })
1459            .into(),
1460        );
1461
1462        let ds_config = DataScrubbingConfig {
1463            scrub_data: true,
1464            scrub_defaults: true,
1465            ..Default::default()
1466        };
1467        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1468
1469        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1470        processor::process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1471        assert_eq!(
1472            get_value!(span.data.code_filepath!).as_str(),
1473            Some("src/sentry/api/authentication.py")
1474        );
1475    }
1476
1477    #[test]
1478    fn test_scrub_breadcrumb_data_http_not_scrubbed() {
1479        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1480            r#"{
1481                "data": {
1482                    "http": {
1483                        "query": "dance=true"
1484                    }
1485                }
1486            }"#,
1487        )
1488        .unwrap();
1489
1490        let ds_config = DataScrubbingConfig {
1491            scrub_data: true,
1492            scrub_defaults: true,
1493            ..Default::default()
1494        };
1495        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1496        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1497        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1498        assert_annotated_snapshot!(breadcrumb);
1499    }
1500
1501    #[test]
1502    fn test_scrub_breadcrumb_data_http_strings_are_scrubbed() {
1503        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1504            r#"{
1505                "data": {
1506                    "http": {
1507                        "query": "ccnumber=5105105105105100&process_id=123",
1508                        "fragment": "ccnumber=5105105105105100,process_id=123"
1509                    }
1510                }
1511            }"#,
1512        )
1513        .unwrap();
1514
1515        let ds_config = DataScrubbingConfig {
1516            scrub_data: true,
1517            scrub_defaults: true,
1518            ..Default::default()
1519        };
1520        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1521        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1522        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1523        assert_annotated_snapshot!(breadcrumb);
1524    }
1525
1526    #[test]
1527    fn test_scrub_breadcrumb_data_http_objects_are_scrubbed() {
1528        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1529            r#"{
1530                "data": {
1531                    "http": {
1532                        "query": {
1533                            "ccnumber": "5105105105105100",
1534                            "process_id": "123"
1535                        },
1536                        "fragment": {
1537                            "ccnumber": "5105105105105100",
1538                            "process_id": "123"
1539                        }
1540                    }
1541                }
1542            }"#,
1543        )
1544        .unwrap();
1545
1546        let ds_config = DataScrubbingConfig {
1547            scrub_data: true,
1548            scrub_defaults: true,
1549            ..Default::default()
1550        };
1551        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1552        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1553
1554        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1555        assert_annotated_snapshot!(breadcrumb);
1556    }
1557
1558    #[test]
1559    fn test_scrub_breadcrumb_data_untyped_props_are_scrubbed() {
1560        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1561            r#"{
1562                "data": {
1563                    "untyped": "ccnumber=5105105105105100",
1564                    "more_untyped": {
1565                        "typed": "no",
1566                        "scrubbed": "yes",
1567                        "ccnumber": "5105105105105100"
1568                    }
1569                }
1570            }"#,
1571        )
1572        .unwrap();
1573
1574        let ds_config = DataScrubbingConfig {
1575            scrub_data: true,
1576            scrub_defaults: true,
1577            ..Default::default()
1578        };
1579        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1580        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1581        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1582        assert_annotated_snapshot!(breadcrumb);
1583    }
1584
1585    #[test]
1586    fn test_scrub_graphql_response_data_with_variables() {
1587        let mut data = Event::from_value(
1588            json!({
1589              "request": {
1590                "data": {
1591                  "query": "{\n  viewer {\n    login\n  }\n}",
1592                  "variables": {
1593                    "login": "foo"
1594                  }
1595                },
1596                "api_target": "graphql"
1597              },
1598              "contexts": {
1599                "response": {
1600                  "type": "response",
1601                  "data": {
1602                    "data": {
1603                      "viewer": {
1604                        "login": "foo"
1605                      }
1606                    }
1607                  }
1608                }
1609              }
1610            })
1611            .into(),
1612        );
1613
1614        scrub_graphql(data.value_mut().as_mut().unwrap());
1615
1616        assert_debug_snapshot!(&data);
1617    }
1618
1619    #[test]
1620    fn test_scrub_graphql_response_data_without_variables() {
1621        let mut data = Event::from_value(
1622            json!({
1623              "request": {
1624                "data": {
1625                  "query": "{\n  viewer {\n    login\n  }\n}"
1626                },
1627                "api_target": "graphql"
1628              },
1629              "contexts": {
1630                "response": {
1631                  "type": "response",
1632                  "data": {
1633                    "data": {
1634                      "viewer": {
1635                        "login": "foo"
1636                      }
1637                    }
1638                  }
1639                }
1640              }
1641            })
1642            .into(),
1643        );
1644
1645        scrub_graphql(data.value_mut().as_mut().unwrap());
1646        assert_debug_snapshot!(&data);
1647    }
1648
1649    #[test]
1650    fn test_does_not_scrub_if_no_graphql() {
1651        let mut data = Event::from_value(
1652            json!({
1653              "request": {
1654                "data": {
1655                  "query": "{\n  viewer {\n    login\n  }\n}",
1656                  "variables": {
1657                    "login": "foo"
1658                  }
1659                },
1660              },
1661              "contexts": {
1662                "response": {
1663                  "type": "response",
1664                  "data": {
1665                    "data": {
1666                      "viewer": {
1667                        "login": "foo"
1668                      }
1669                    }
1670                  }
1671                }
1672              }
1673            })
1674            .into(),
1675        );
1676
1677        let scrubbing_config = DataScrubbingConfig {
1678            scrub_data: true,
1679            scrub_ip_addresses: true,
1680            scrub_defaults: true,
1681            ..Default::default()
1682        };
1683
1684        let pii_config = to_pii_config(&scrubbing_config).unwrap();
1685        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1686
1687        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1688
1689        assert_debug_snapshot!(&data);
1690    }
1691
1692    #[test]
1693    fn test_logentry_params_scrubbed() {
1694        let config = serde_json::from_str::<PiiConfig>(
1695            r##"
1696                {
1697                    "applications": {
1698                        "$string": ["@anything:remove"]
1699                    }
1700                }
1701                "##,
1702        )
1703        .unwrap();
1704
1705        let mut event = Annotated::new(Event {
1706            logentry: Annotated::new(LogEntry {
1707                message: Annotated::new(Message::from("failed to parse report id=%s".to_owned())),
1708                formatted: Annotated::new("failed to parse report id=1".to_string().into()),
1709                params: Annotated::new(Value::Array(vec![Annotated::new(Value::String(
1710                    "12345".to_owned(),
1711                ))])),
1712                ..Default::default()
1713            }),
1714            ..Default::default()
1715        });
1716
1717        let mut processor = PiiProcessor::new(config.compiled());
1718        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1719
1720        let params = get_value!(event.logentry.params!);
1721        assert_debug_snapshot!(params, @r#"Array(
1722    [
1723        Meta {
1724            remarks: [
1725                Remark {
1726                    ty: Removed,
1727                    rule_id: "@anything:remove",
1728                    range: None,
1729                },
1730            ],
1731            errors: [],
1732            original_length: None,
1733            original_value: None,
1734        },
1735    ],
1736)"#);
1737    }
1738
1739    #[test]
1740    fn test_is_pairlist() {
1741        for (case, expected) in [
1742            (r#"[]"#, false),
1743            (r#"["foo"]"#, false),
1744            (r#"["foo", 123]"#, false),
1745            (r#"[[1, "foo"]]"#, false),
1746            (r#"[[["too_nested", 123]]]"#, false),
1747            (r#"[["foo", "bar"], [1, "foo"]]"#, false),
1748            (r#"[["foo", "bar"], ["foo", "bar", "baz"]]"#, false),
1749            (r#"[["foo", "bar", "baz"], ["foo", "bar"]]"#, false),
1750            (r#"["foo", ["bar", "baz"], ["foo", "bar"]]"#, false),
1751            (r#"[["foo", "bar"], [["too_nested", 123]]]"#, false),
1752            (r#"[["foo", 123]]"#, true),
1753            (r#"[["foo", "bar"]]"#, true),
1754            (
1755                r#"[["foo", "bar"], ["foo", {"nested": {"something": 1}}]]"#,
1756                true,
1757            ),
1758        ] {
1759            let v = Annotated::<Value>::from_json(case).unwrap();
1760            let Annotated(Some(Value::Array(mut a)), _) = v else {
1761                panic!()
1762            };
1763            assert_eq!(is_pairlist(&mut a), expected, "{case}");
1764        }
1765    }
1766
1767    #[test]
1768    fn test_tuple_array_scrubbed_with_path_selector() {
1769        // We expect that both of these configs express the same semantics.
1770        let configs = vec![
1771            // This configuration matches on the authorization element (the 1st element of the array
1772            // represents the key).
1773            r##"
1774                {
1775                    "applications": {
1776                        "exception.values.0.stacktrace.frames.0.vars.headers.authorization": ["@anything:replace"]
1777                    }
1778                }
1779                "##,
1780            // This configuration matches on the 2nd element of the array.
1781            r##"
1782                {
1783                    "applications": {
1784                        "exception.values.0.stacktrace.frames.0.vars.headers.0.1": ["@anything:replace"]
1785                    }
1786                }
1787                "##,
1788        ];
1789
1790        let mut event = Event::from_value(
1791            serde_json::json!(
1792            {
1793              "message": "hi",
1794              "exception": {
1795                "values": [
1796                  {
1797                    "type": "BrokenException",
1798                    "value": "Something failed",
1799                    "stacktrace": {
1800                      "frames": [
1801                        {
1802                            "vars": {
1803                                "headers": [
1804                                    ["authorization", "Bearer abc123"]
1805                                ]
1806                            }
1807                        }
1808                      ]
1809                    }
1810                  }
1811                ]
1812              }
1813            })
1814            .into(),
1815        );
1816
1817        for config in configs {
1818            let config = serde_json::from_str::<PiiConfig>(config).unwrap();
1819            let mut processor = PiiProcessor::new(config.compiled());
1820            process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1821
1822            let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
1823
1824            allow_duplicates!(assert_debug_snapshot!(vars, @r#"
1825        FrameVars(
1826            {
1827                "headers": Array(
1828                    [
1829                        Array(
1830                            [
1831                                String(
1832                                    "authorization",
1833                                ),
1834                                Annotated(
1835                                    String(
1836                                        "[Filtered]",
1837                                    ),
1838                                    Meta {
1839                                        remarks: [
1840                                            Remark {
1841                                                ty: Substituted,
1842                                                rule_id: "@anything:replace",
1843                                                range: Some(
1844                                                    (
1845                                                        0,
1846                                                        10,
1847                                                    ),
1848                                                ),
1849                                            },
1850                                        ],
1851                                        errors: [],
1852                                        original_length: Some(
1853                                            13,
1854                                        ),
1855                                        original_value: None,
1856                                    },
1857                                ),
1858                            ],
1859                        ),
1860                    ],
1861                ),
1862            },
1863        )
1864        "#));
1865        }
1866    }
1867
1868    #[test]
1869    fn test_tuple_array_scrubbed_with_string_selector_and_password_matcher() {
1870        let config = serde_json::from_str::<PiiConfig>(
1871            r##"
1872                {
1873                    "applications": {
1874                        "$string": ["@password:remove"]
1875                    }
1876                }
1877                "##,
1878        )
1879        .unwrap();
1880
1881        let mut event = Event::from_value(
1882            serde_json::json!(
1883            {
1884              "message": "hi",
1885              "exception": {
1886                "values": [
1887                  {
1888                    "type": "BrokenException",
1889                    "value": "Something failed",
1890                    "stacktrace": {
1891                      "frames": [
1892                        {
1893                            "vars": {
1894                                "headers": [
1895                                    ["authorization", "abc123"]
1896                                ]
1897                            }
1898                        }
1899                      ]
1900                    }
1901                  }
1902                ]
1903              }
1904            })
1905            .into(),
1906        );
1907
1908        let mut processor = PiiProcessor::new(config.compiled());
1909        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1910
1911        let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
1912
1913        assert_debug_snapshot!(vars, @r###"
1914        FrameVars(
1915            {
1916                "headers": Array(
1917                    [
1918                        Array(
1919                            [
1920                                String(
1921                                    "authorization",
1922                                ),
1923                                Meta {
1924                                    remarks: [
1925                                        Remark {
1926                                            ty: Removed,
1927                                            rule_id: "@password:remove",
1928                                            range: None,
1929                                        },
1930                                    ],
1931                                    errors: [],
1932                                    original_length: None,
1933                                    original_value: None,
1934                                },
1935                            ],
1936                        ),
1937                    ],
1938                ),
1939            },
1940        )
1941        "###);
1942    }
1943}