relay_pii/
processor.rs

1use std::borrow::Cow;
2use std::collections::{BTreeMap, BTreeSet};
3use std::mem;
4use std::sync::OnceLock;
5
6use regex::Regex;
7use relay_event_schema::processor::{
8    self, Chunk, FieldAttrs, Pii, ProcessValue, ProcessingAction, ProcessingResult,
9    ProcessingState, Processor, ValueType, enum_set, process_value,
10};
11use relay_event_schema::protocol::{
12    AsPair, Event, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User,
13};
14use relay_protocol::{Annotated, Array, Meta, Remark, RemarkType, Value};
15
16use crate::compiledconfig::{CompiledPiiConfig, RuleRef};
17use crate::config::RuleType;
18use crate::redactions::Redaction;
19use crate::regexes::{self, ANYTHING_REGEX, PatternType, ReplaceBehavior};
20use crate::utils;
21
22/// Controls how scrubbing rules are applied to attributes.
23#[derive(Debug, Clone, Copy)]
24pub enum AttributeMode {
25    /// Treat the attribute as an object and allow referring
26    /// to individual fields.
27    Object,
28    /// Identify the attribute with its value and apply all
29    /// rules there directly.
30    ValueOnly,
31}
32
33/// A processor that performs PII stripping.
34pub struct PiiProcessor<'a> {
35    /// Controls how rules are applied to attributes.
36    attribute_mode: AttributeMode,
37    compiled_config: &'a CompiledPiiConfig,
38}
39
40impl<'a> PiiProcessor<'a> {
41    /// Creates a new processor based on a config.
42    pub fn new(compiled_config: &'a CompiledPiiConfig) -> PiiProcessor<'a> {
43        // this constructor needs to be cheap... a new PiiProcessor is created for each event. Move
44        // any init logic into CompiledPiiConfig::new.
45        PiiProcessor {
46            compiled_config,
47            attribute_mode: AttributeMode::Object,
48        }
49    }
50
51    /// Sets an `AttributeMode` on this processor.
52    pub fn attribute_mode(mut self, attribute_mode: AttributeMode) -> Self {
53        self.attribute_mode = attribute_mode;
54        self
55    }
56
57    fn apply_all_rules(
58        &self,
59        meta: &mut Meta,
60        state: &ProcessingState<'_>,
61        mut value: Option<&mut String>,
62    ) -> ProcessingResult {
63        let pii = state.pii();
64        if pii == Pii::False {
65            return Ok(());
66        }
67
68        for (selector, rules) in self.compiled_config.applications.iter() {
69            if selector.matches_path(&state.path()) {
70                #[allow(clippy::needless_option_as_deref)]
71                for rule in rules {
72                    let reborrowed_value = value.as_deref_mut();
73                    apply_rule_to_value(meta, rule, state.path().key(), reborrowed_value)?;
74                }
75            }
76        }
77
78        Ok(())
79    }
80}
81
82impl Processor for PiiProcessor<'_> {
83    fn before_process<T: ProcessValue>(
84        &mut self,
85        value: Option<&T>,
86        meta: &mut Meta,
87        state: &ProcessingState<'_>,
88    ) -> ProcessingResult {
89        if let Some(Value::String(original_value)) = meta.original_value_as_mut() {
90            // Also apply pii scrubbing to the original value (set by normalization or other processors),
91            // such that we do not leak sensitive data through meta. Deletes `original_value` if an Error
92            // value is returned.
93            if let Some(parent) = state.iter().next() {
94                let path = state.path();
95                let new_state = parent.enter_borrowed(
96                    path.key().unwrap_or(""),
97                    Some(Cow::Borrowed(state.attrs())),
98                    enum_set!(ValueType::String),
99                );
100
101                if self
102                    .apply_all_rules(&mut Meta::default(), &new_state, Some(original_value))
103                    .is_err()
104                {
105                    // `apply_all_rules` returned `DeleteValueHard` or `DeleteValueSoft`, so delete the original as well.
106                    meta.set_original_value(Option::<String>::None);
107                }
108            }
109        }
110
111        // booleans cannot be PII, and strings are handled in process_string
112        if state.value_type().contains(ValueType::Boolean)
113            || state.value_type().contains(ValueType::String)
114        {
115            return Ok(());
116        }
117
118        if value.is_none() {
119            return Ok(());
120        }
121
122        // apply rules based on key/path
123        self.apply_all_rules(meta, state, None)
124    }
125
126    fn process_array<T>(
127        &mut self,
128        array: &mut Array<T>,
129        _meta: &mut Meta,
130        state: &ProcessingState<'_>,
131    ) -> ProcessingResult
132    where
133        T: ProcessValue,
134    {
135        if is_pairlist(array) {
136            for annotated in array {
137                let mut mapped = mem::take(annotated).map_value(T::into_value);
138
139                if let Some(Value::Array(pair)) = mapped.value_mut() {
140                    let mut value = mem::take(&mut pair[1]);
141                    let value_type = ValueType::for_field(&value);
142
143                    if let Some(key_name) = &pair[0].as_str() {
144                        // We enter the key of the first element of the array, since we treat it
145                        // as a pair.
146                        let key_state =
147                            state.enter_borrowed(key_name, state.inner_attrs(), value_type);
148                        // We process the value with a state that "simulates" the first value of the
149                        // array as if it was the key of a dictionary.
150                        process_value(&mut value, self, &key_state)?;
151                    }
152
153                    // Put value back into pair.
154                    pair[1] = value;
155                }
156
157                // Put pair back into array.
158                *annotated = T::from_value(mapped);
159            }
160
161            Ok(())
162        } else {
163            // If we didn't find a pairlist, we can process child values as normal.
164            array.process_child_values(self, state)
165        }
166    }
167
168    fn process_string(
169        &mut self,
170        value: &mut String,
171        meta: &mut Meta,
172        state: &ProcessingState<'_>,
173    ) -> ProcessingResult {
174        if let "" | "true" | "false" | "null" | "undefined" = value.as_str() {
175            return Ok(());
176        }
177
178        // same as before_process. duplicated here because we can only check for "true",
179        // "false" etc in process_string.
180        self.apply_all_rules(meta, state, Some(value))
181    }
182
183    fn process_native_image_path(
184        &mut self,
185        NativeImagePath(value): &mut NativeImagePath,
186        meta: &mut Meta,
187        state: &ProcessingState<'_>,
188    ) -> ProcessingResult {
189        // In NativeImagePath we must not strip the file's basename because that would break
190        // processing.
191        //
192        // We pop the basename from the end of the string, call process_string and push the
193        // basename again.
194        //
195        // The ranges in Meta should still be right as long as we only pop/push from the end of the
196        // string. If we decide that we need to preserve anything other than suffixes all PII
197        // tooltips/annotations are potentially wrong.
198
199        if let Some(index) = value.rfind(['/', '\\']) {
200            let basename = value.split_off(index);
201            match self.process_string(value, meta, state) {
202                Ok(()) => value.push_str(&basename),
203                Err(
204                    ProcessingAction::DeleteValueHard
205                    | ProcessingAction::DeleteValueWithRemark(_)
206                    | ProcessingAction::DeleteValueSoft,
207                ) => {
208                    basename[1..].clone_into(value);
209                }
210                Err(ProcessingAction::InvalidTransaction(x)) => {
211                    return Err(ProcessingAction::InvalidTransaction(x));
212                }
213            }
214        }
215
216        Ok(())
217    }
218
219    fn process_pairlist<T: ProcessValue + AsPair>(
220        &mut self,
221        value: &mut PairList<T>,
222        _meta: &mut Meta,
223        state: &ProcessingState,
224    ) -> ProcessingResult {
225        utils::process_pairlist(self, value, state)
226    }
227
228    fn process_attributes(
229        &mut self,
230        value: &mut relay_event_schema::protocol::Attributes,
231        _meta: &mut Meta,
232        state: &ProcessingState,
233    ) -> ProcessingResult {
234        match self.attribute_mode {
235            // Treat each attribute as an object and just process them field by field.
236            AttributeMode::Object => value.process_child_values(self, state),
237            // Identify each attribute with its `value` and only process that.
238            AttributeMode::ValueOnly => {
239                for (key, attribute) in value.0.iter_mut() {
240                    let Some(attribute) = attribute.value_mut() else {
241                        continue;
242                    };
243
244                    // We need some manual state management here because we're bypassing all the
245                    // intermediate structures and pointing at the value directly. This essentially
246                    // mimics the attributes and value type that the metastructure derivation would
247                    // produce for the attribute vaue.
248                    let attrs = FieldAttrs::new()
249                        .pii_dynamic(relay_event_schema::protocol::attribute_pii_from_conventions);
250                    let inner_value = &mut attribute.value.value;
251                    let inner_value_type = ValueType::for_field(inner_value);
252                    let entered =
253                        state.enter_borrowed(key, Some(Cow::Borrowed(&attrs)), inner_value_type);
254
255                    processor::process_value(inner_value, self, &entered)?;
256                    self.process_other(&mut attribute.other, state)?;
257                }
258                Ok(())
259            }
260        }
261    }
262
263    fn process_user(
264        &mut self,
265        user: &mut User,
266        _meta: &mut Meta,
267        state: &ProcessingState<'_>,
268    ) -> ProcessingResult {
269        let ip_was_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
270
271        // Recurse into the user and does PII processing on fields.
272        user.process_child_values(self, state)?;
273
274        let has_other_fields = user.id.value().is_some()
275            || user.username.value().is_some()
276            || user.email.value().is_some();
277
278        let ip_is_still_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
279
280        // If the IP address has become invalid as part of PII processing, we move it into the user
281        // ID. That ensures people can do IP hashing and still have a correct users-affected count.
282        //
283        // Right now both Snuba and EventUser discard unparseable IPs for indexing, and we assume
284        // we want to keep it that way.
285        //
286        // If there are any other fields set that take priority over the IP for uniquely
287        // identifying a user (has_other_fields), we do not want to do anything. The value will be
288        // wiped out in renormalization anyway.
289        if ip_was_valid && !has_other_fields && !ip_is_still_valid {
290            user.id = mem::take(&mut user.ip_address).map_value(|ip| ip.into_inner().into());
291            user.ip_address.meta_mut().add_remark(Remark::new(
292                RemarkType::Removed,
293                "pii:ip_address".to_owned(),
294            ));
295        }
296
297        Ok(())
298    }
299
300    // Replay PII processor entry point.
301    fn process_replay(
302        &mut self,
303        replay: &mut Replay,
304        _meta: &mut Meta,
305        state: &ProcessingState<'_>,
306    ) -> ProcessingResult {
307        replay.process_child_values(self, state)?;
308        Ok(())
309    }
310}
311
312#[derive(Default)]
313struct PairListProcessor {
314    is_pair: bool,
315    has_string_key: bool,
316}
317
318impl PairListProcessor {
319    /// Returns true if the processor identified the supplied data as an array composed of
320    /// a key (string) and a value.
321    fn is_pair_array(&self) -> bool {
322        self.is_pair && self.has_string_key
323    }
324}
325
326impl Processor for PairListProcessor {
327    fn process_array<T>(
328        &mut self,
329        value: &mut Array<T>,
330        _meta: &mut Meta,
331        state: &ProcessingState<'_>,
332    ) -> ProcessingResult
333    where
334        T: ProcessValue,
335    {
336        self.is_pair = state.depth() == 0 && value.len() == 2;
337        if self.is_pair {
338            let key_type = ValueType::for_field(&value[0]);
339            process_value(
340                &mut value[0],
341                self,
342                &state.enter_index(0, state.inner_attrs(), key_type),
343            )?;
344        }
345
346        Ok(())
347    }
348
349    fn process_string(
350        &mut self,
351        _value: &mut String,
352        _meta: &mut Meta,
353        state: &ProcessingState<'_>,
354    ) -> ProcessingResult where {
355        if state.depth() == 1 && state.path().index() == Some(0) {
356            self.has_string_key = true;
357        }
358
359        Ok(())
360    }
361}
362
363fn is_pairlist<T: ProcessValue>(array: &mut Array<T>) -> bool {
364    for element in array.iter_mut() {
365        let mut visitor = PairListProcessor::default();
366        process_value(element, &mut visitor, ProcessingState::root()).ok();
367        if !visitor.is_pair_array() {
368            return false;
369        }
370    }
371
372    !array.is_empty()
373}
374
375/// Scrubs GraphQL variables from the event.
376pub fn scrub_graphql(event: &mut Event) {
377    let mut keys: BTreeSet<&str> = BTreeSet::new();
378
379    let mut is_graphql = false;
380
381    // Collect the variables keys and scrub them out.
382    if let Some(request) = event.request.value_mut()
383        && let Some(Value::Object(data)) = request.data.value_mut()
384    {
385        if let Some(api_target) = request.api_target.value()
386            && api_target.eq_ignore_ascii_case("graphql")
387        {
388            is_graphql = true;
389        }
390
391        if is_graphql
392            && let Some(Annotated(Some(Value::Object(variables)), _)) = data.get_mut("variables")
393        {
394            for (key, value) in variables.iter_mut() {
395                keys.insert(key);
396                value.set_value(Some(Value::String("[Filtered]".to_owned())));
397            }
398        }
399    }
400
401    if !is_graphql {
402        return;
403    }
404
405    // Scrub PII from the data object if they match the variables keys.
406    if let Some(contexts) = event.contexts.value_mut()
407        && let Some(response) = contexts.get_mut::<ResponseContext>()
408        && let Some(Value::Object(data)) = response.data.value_mut()
409        && let Some(Annotated(Some(Value::Object(graphql_data)), _)) = data.get_mut("data")
410    {
411        if !keys.is_empty() {
412            scrub_graphql_data(&keys, graphql_data);
413        } else {
414            // If we don't have the variable keys, we scrub the whole data object
415            // because the query or mutation weren't parameterized.
416            data.remove("data");
417        }
418    }
419}
420
421/// Scrubs values from the data object to `[Filtered]`.
422fn scrub_graphql_data(keys: &BTreeSet<&str>, data: &mut BTreeMap<String, Annotated<Value>>) {
423    for (key, value) in data.iter_mut() {
424        match value.value_mut() {
425            Some(Value::Object(item_data)) => {
426                scrub_graphql_data(keys, item_data);
427            }
428            _ => {
429                if keys.contains(key.as_str()) {
430                    value.set_value(Some(Value::String("[Filtered]".to_owned())));
431                }
432            }
433        }
434    }
435}
436
437fn apply_rule_to_value(
438    meta: &mut Meta,
439    rule: &RuleRef,
440    key: Option<&str>,
441    mut value: Option<&mut String>,
442) -> ProcessingResult {
443    // The rule might specify to remove or to redact. If redaction is chosen, we need to
444    // chunk up the value, otherwise we need to simply mark the value for deletion.
445    let should_redact_chunks = !matches!(rule.redaction, Redaction::Default | Redaction::Remove);
446
447    // In case the value is not a string (but a container, bool or number) and the rule matches on
448    // anything, we can only remove the value (not replace, hash, etc).
449    if rule.ty == RuleType::Anything && (value.is_none() || !should_redact_chunks) {
450        // The value is a container, @anything on a container can do nothing but delete.
451        meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
452        return Err(ProcessingAction::DeleteValueHard);
453    }
454
455    macro_rules! apply_regex {
456        ($regex:expr, $replace_behavior:expr) => {
457            if let Some(ref mut value) = value {
458                processor::process_chunked_value(value, meta, |chunks| {
459                    apply_regex_to_chunks(chunks, rule, $regex, $replace_behavior)
460                });
461            }
462        };
463    }
464
465    for (pattern_type, regex, replace_behavior) in regexes::get_regex_for_rule_type(&rule.ty) {
466        match pattern_type {
467            PatternType::KeyValue => {
468                if regex.is_match(key.unwrap_or("")) {
469                    if value.is_some() && should_redact_chunks {
470                        // If we're given a string value here, redact the value like we would with
471                        // @anything.
472                        apply_regex!(&ANYTHING_REGEX, replace_behavior);
473                    } else {
474                        meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
475                        return Err(ProcessingAction::DeleteValueHard);
476                    }
477                } else {
478                    // If we did not redact using the key, we will redact the entire value if the key
479                    // appears in it.
480                    apply_regex!(regex, replace_behavior);
481                }
482            }
483            PatternType::Value => {
484                apply_regex!(regex, replace_behavior);
485            }
486        }
487    }
488
489    Ok(())
490}
491
492fn apply_regex_to_chunks<'a>(
493    chunks: Vec<Chunk<'a>>,
494    rule: &RuleRef,
495    regex: &Regex,
496    replace_behavior: ReplaceBehavior,
497) -> Vec<Chunk<'a>> {
498    // NB: This function allocates the entire string and all chunks a second time. This means it
499    // cannot reuse chunks and reallocates them. Ideally, we would be able to run the regex directly
500    // on the chunks, but the `regex` crate does not support that.
501
502    let mut search_string = String::new();
503    let mut has_text = false;
504    for chunk in &chunks {
505        match chunk {
506            Chunk::Text { text } => {
507                has_text = true;
508                search_string.push_str(&text.replace('\x00', ""));
509            }
510            Chunk::Redaction { .. } => search_string.push('\x00'),
511        }
512    }
513
514    if !has_text {
515        // Nothing to replace.
516        return chunks;
517    }
518
519    // Early exit if this regex does not match and return the original chunks.
520    let mut captures_iter = regex.captures_iter(&search_string).peekable();
521    if captures_iter.peek().is_none() {
522        return chunks;
523    }
524
525    let mut replacement_chunks = vec![];
526    for chunk in chunks {
527        if let Chunk::Redaction { .. } = chunk {
528            replacement_chunks.push(chunk);
529        }
530    }
531    replacement_chunks.reverse();
532
533    fn process_text<'a>(
534        text: &str,
535        rv: &mut Vec<Chunk<'a>>,
536        replacement_chunks: &mut Vec<Chunk<'a>>,
537    ) {
538        if text.is_empty() {
539            return;
540        }
541
542        static NULL_SPLIT_RE: OnceLock<Regex> = OnceLock::new();
543        let regex = NULL_SPLIT_RE.get_or_init(|| {
544            #[allow(clippy::trivial_regex)]
545            Regex::new("\x00").unwrap()
546        });
547
548        let mut pos = 0;
549        for piece in regex.find_iter(text) {
550            rv.push(Chunk::Text {
551                text: Cow::Owned(text[pos..piece.start()].to_string()),
552            });
553            rv.push(replacement_chunks.pop().unwrap());
554            pos = piece.end();
555        }
556
557        rv.push(Chunk::Text {
558            text: Cow::Owned(text[pos..].to_string()),
559        });
560    }
561
562    let mut pos = 0;
563    let mut rv = Vec::with_capacity(replacement_chunks.len());
564
565    match replace_behavior {
566        ReplaceBehavior::Groups(ref groups) => {
567            for m in captures_iter {
568                for (idx, g) in m.iter().enumerate() {
569                    if let Some(g) = g
570                        && groups.contains(&(idx as u8))
571                    {
572                        process_text(
573                            &search_string[pos..g.start()],
574                            &mut rv,
575                            &mut replacement_chunks,
576                        );
577                        insert_replacement_chunks(rule, g.as_str(), &mut rv);
578                        pos = g.end();
579                    }
580                }
581            }
582            process_text(&search_string[pos..], &mut rv, &mut replacement_chunks);
583            debug_assert!(replacement_chunks.is_empty());
584        }
585        ReplaceBehavior::Value => {
586            // We only want to replace a string value, and the replacement chunk for that is
587            // inserted by insert_replacement_chunks. Adding chunks from replacement_chunks
588            // results in the incorrect behavior of a total of more chunks than the input.
589            insert_replacement_chunks(rule, &search_string, &mut rv);
590        }
591    }
592    rv
593}
594
595fn insert_replacement_chunks(rule: &RuleRef, text: &str, output: &mut Vec<Chunk<'_>>) {
596    match &rule.redaction {
597        Redaction::Default | Redaction::Remove => {
598            output.push(Chunk::Redaction {
599                text: Cow::Borrowed(""),
600                rule_id: Cow::Owned(rule.origin.to_string()),
601                ty: RemarkType::Removed,
602            });
603        }
604        Redaction::Mask => {
605            let buf = vec!['*'; text.chars().count()];
606
607            output.push(Chunk::Redaction {
608                ty: RemarkType::Masked,
609                rule_id: Cow::Owned(rule.origin.to_string()),
610                text: buf.into_iter().collect(),
611            })
612        }
613        Redaction::Hash => {
614            output.push(Chunk::Redaction {
615                ty: RemarkType::Pseudonymized,
616                rule_id: Cow::Owned(rule.origin.to_string()),
617                text: Cow::Owned(utils::hash_value(text.as_bytes())),
618            });
619        }
620        Redaction::Replace(replace) => {
621            output.push(Chunk::Redaction {
622                ty: RemarkType::Substituted,
623                rule_id: Cow::Owned(rule.origin.to_string()),
624                text: Cow::Owned(replace.text.clone()),
625            });
626        }
627        Redaction::Other => relay_log::debug!("Incoming redaction is not supported"),
628    }
629}
630
631#[cfg(test)]
632mod tests {
633    use insta::{allow_duplicates, assert_debug_snapshot};
634    use relay_event_schema::processor::process_value;
635    use relay_event_schema::protocol::{
636        Addr, Breadcrumb, DebugImage, DebugMeta, ExtraValue, Headers, LogEntry, Message,
637        NativeDebugImage, Request, Span, TagEntry, Tags, TraceContext,
638    };
639    use relay_protocol::{FromValue, Object, assert_annotated_snapshot, get_value};
640    use serde_json::json;
641
642    use super::*;
643    use crate::{DataScrubbingConfig, PiiConfig, ReplaceRedaction};
644
645    fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Option<PiiConfig> {
646        use crate::convert::to_pii_config as to_pii_config_impl;
647        let rv = to_pii_config_impl(datascrubbing_config).unwrap();
648        if let Some(ref config) = rv {
649            let roundtrip: PiiConfig =
650                serde_json::from_value(serde_json::to_value(config).unwrap()).unwrap();
651            assert_eq!(&roundtrip, config);
652        }
653        rv
654    }
655
656    #[test]
657    fn test_scrub_original_value() {
658        let mut data = Event::from_value(
659            json!({
660                "user": {
661                    "username": "hey  man 73.133.27.120", // should be stripped despite not being "known ip field"
662                    "ip_address": "is this an ip address? 73.133.27.120", //  <--------
663                },
664                "hpkp":"invalid data my ip address is  74.133.27.120 and my credit card number is  4571234567890111 ",
665            })
666            .into(),
667        );
668
669        let scrubbing_config = DataScrubbingConfig {
670            scrub_data: true,
671            scrub_ip_addresses: true,
672            scrub_defaults: true,
673            ..Default::default()
674        };
675
676        let pii_config = to_pii_config(&scrubbing_config).unwrap();
677        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
678
679        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
680
681        assert_debug_snapshot!(&data);
682    }
683
684    #[test]
685    fn test_sentry_user() {
686        let mut data = Event::from_value(
687            json!({
688                "user": {
689                    "ip_address": "73.133.27.120",
690                    "sentry_user": "ip:73.133.27.120",
691                },
692            })
693            .into(),
694        );
695
696        let scrubbing_config = DataScrubbingConfig {
697            scrub_data: true,
698            scrub_ip_addresses: true,
699            scrub_defaults: true,
700            ..Default::default()
701        };
702
703        let pii_config = to_pii_config(&scrubbing_config).unwrap();
704        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
705
706        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
707
708        assert_debug_snapshot!(&data);
709    }
710
711    #[test]
712    fn test_basic_stripping() {
713        let config = serde_json::from_str::<PiiConfig>(
714            r#"
715            {
716                "rules": {
717                    "remove_bad_headers": {
718                        "type": "redact_pair",
719                        "keyPattern": "(?i)cookie|secret[-_]?key"
720                    }
721                },
722                "applications": {
723                    "$string": ["@ip"],
724                    "$object.**": ["remove_bad_headers"]
725                }
726            }
727            "#,
728        )
729        .unwrap();
730
731        let mut event = Annotated::new(Event {
732            logentry: Annotated::new(LogEntry {
733                formatted: Annotated::new("Hello world!".to_owned().into()),
734                ..Default::default()
735            }),
736            request: Annotated::new(Request {
737                env: {
738                    let mut rv = Object::new();
739                    rv.insert(
740                        "SECRET_KEY".to_owned(),
741                        Annotated::new(Value::String("134141231231231231231312".into())),
742                    );
743                    Annotated::new(rv)
744                },
745                headers: {
746                    let rv = vec![
747                        Annotated::new((
748                            Annotated::new("Cookie".to_owned().into()),
749                            Annotated::new("super secret".to_owned().into()),
750                        )),
751                        Annotated::new((
752                            Annotated::new("X-Forwarded-For".to_owned().into()),
753                            Annotated::new("127.0.0.1".to_owned().into()),
754                        )),
755                    ];
756                    Annotated::new(Headers(PairList(rv)))
757                },
758                ..Default::default()
759            }),
760            tags: Annotated::new(Tags(
761                vec![Annotated::new(TagEntry(
762                    Annotated::new("forwarded_for".to_owned()),
763                    Annotated::new("127.0.0.1".to_owned()),
764                ))]
765                .into(),
766            )),
767            ..Default::default()
768        });
769
770        let mut processor = PiiProcessor::new(config.compiled());
771        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
772        assert_annotated_snapshot!(event);
773    }
774
775    #[test]
776    fn test_redact_containers() {
777        let config = serde_json::from_str::<PiiConfig>(
778            r#"
779            {
780                "applications": {
781                    "$object": ["@anything"]
782                }
783            }
784            "#,
785        )
786        .unwrap();
787
788        let mut event = Annotated::new(Event {
789            extra: {
790                let mut map = Object::new();
791                map.insert(
792                    "foo".to_owned(),
793                    Annotated::new(ExtraValue(Value::String("bar".to_owned()))),
794                );
795                Annotated::new(map)
796            },
797            ..Default::default()
798        });
799
800        let mut processor = PiiProcessor::new(config.compiled());
801        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
802        assert_annotated_snapshot!(event);
803    }
804
805    #[test]
806    fn test_redact_custom_pattern() {
807        let config = serde_json::from_str::<PiiConfig>(
808            r#"
809            {
810                "applications": {
811                    "$string": ["myrule"]
812                },
813                "rules": {
814                    "myrule": {
815                        "type": "pattern",
816                        "pattern": "foo",
817                        "redaction": {
818                            "method": "replace",
819                            "text": "asd"
820                        }
821                    }
822                }
823            }
824            "#,
825        )
826        .unwrap();
827
828        let mut event = Annotated::new(Event {
829            extra: {
830                let mut map = Object::new();
831                map.insert(
832                    "myvalue".to_owned(),
833                    Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
834                );
835                Annotated::new(map)
836            },
837            ..Default::default()
838        });
839
840        let mut processor = PiiProcessor::new(config.compiled());
841        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
842        assert_annotated_snapshot!(event);
843    }
844
845    #[test]
846    fn test_redact_custom_negative_pattern() {
847        let config = serde_json::from_str::<PiiConfig>(
848            r#"
849            {
850                "applications": {
851                    "$string": ["myrule"]
852                },
853                "rules": {
854                    "myrule": {
855                        "type": "pattern",
856                        "pattern": "the good string|.*OK.*|(.*)",
857                        "replaceGroups": [1],
858                        "redaction": {
859                            "method": "mask"
860                        }
861                    }
862                }
863            }
864            "#,
865        )
866        .unwrap();
867
868        let mut event = Annotated::<Event>::from_json(
869            r#"{
870            "extra": {
871                "1": "the good string",
872                "2": "a bad string",
873                "3": "another OK string",
874                "4": "another bad one"
875            }
876        }"#,
877        )
878        .unwrap();
879
880        let mut processor = PiiProcessor::new(config.compiled());
881        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
882        assert_annotated_snapshot!(event.value().unwrap().extra, @r#"
883        {
884          "1": "the good string",
885          "2": "************",
886          "3": "another OK string",
887          "4": "***************",
888          "_meta": {
889            "2": {
890              "": {
891                "rem": [
892                  [
893                    "myrule",
894                    "m",
895                    0,
896                    12
897                  ]
898                ],
899                "len": 12
900              }
901            },
902            "4": {
903              "": {
904                "rem": [
905                  [
906                    "myrule",
907                    "m",
908                    0,
909                    15
910                  ]
911                ],
912                "len": 15
913              }
914            }
915          }
916        }
917        "#);
918    }
919
920    #[test]
921    fn test_no_field_upsert() {
922        let config = serde_json::from_str::<PiiConfig>(
923            r#"
924            {
925                "applications": {
926                    "**": ["@anything:remove"]
927                }
928            }
929            "#,
930        )
931        .unwrap();
932
933        let mut event = Annotated::new(Event {
934            extra: {
935                let mut map = Object::new();
936                map.insert(
937                    "myvalue".to_owned(),
938                    Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
939                );
940                Annotated::new(map)
941            },
942            ..Default::default()
943        });
944
945        let mut processor = PiiProcessor::new(config.compiled());
946        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
947        assert_annotated_snapshot!(event);
948    }
949
950    #[test]
951    fn test_anything_hash_on_string() {
952        let config = serde_json::from_str::<PiiConfig>(
953            r#"
954            {
955                "applications": {
956                    "$string": ["@anything:hash"]
957                }
958            }
959            "#,
960        )
961        .unwrap();
962
963        let mut event = Annotated::new(Event {
964            extra: {
965                let mut map = Object::new();
966                map.insert(
967                    "myvalue".to_owned(),
968                    Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
969                );
970                Annotated::new(map)
971            },
972            ..Default::default()
973        });
974
975        let mut processor = PiiProcessor::new(config.compiled());
976        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
977        assert_annotated_snapshot!(event);
978    }
979
980    #[test]
981    fn test_anything_hash_on_container() {
982        let config = serde_json::from_str::<PiiConfig>(
983            r#"
984            {
985                "applications": {
986                    "$object": ["@anything:hash"]
987                }
988            }
989            "#,
990        )
991        .unwrap();
992
993        let mut event = Annotated::new(Event {
994            extra: {
995                let mut map = Object::new();
996                map.insert(
997                    "myvalue".to_owned(),
998                    Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
999                );
1000                Annotated::new(map)
1001            },
1002            ..Default::default()
1003        });
1004
1005        let mut processor = PiiProcessor::new(config.compiled());
1006        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1007        assert_annotated_snapshot!(event);
1008    }
1009
1010    #[test]
1011    fn test_ignore_user_agent_ip_scrubbing() {
1012        let mut data = Event::from_value(
1013            json!({
1014                "request": {
1015                    "headers": [
1016                        ["User-Agent", "127.0.0.1"],
1017                        ["X-Client-Ip", "10.0.0.1"]
1018                    ]
1019                },
1020            })
1021            .into(),
1022        );
1023
1024        let scrubbing_config = DataScrubbingConfig {
1025            scrub_data: true,
1026            scrub_ip_addresses: true,
1027            scrub_defaults: true,
1028            ..Default::default()
1029        };
1030
1031        let pii_config = to_pii_config(&scrubbing_config).unwrap();
1032        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1033
1034        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1035
1036        assert_annotated_snapshot!(&data);
1037    }
1038
1039    #[test]
1040    fn test_remove_debugmeta_path() {
1041        let config = serde_json::from_str::<PiiConfig>(
1042            r#"
1043            {
1044                "applications": {
1045                    "debug_meta.images.*.code_file": ["@anything:remove"],
1046                    "debug_meta.images.*.debug_file": ["@anything:remove"]
1047                }
1048            }
1049            "#,
1050        )
1051        .unwrap();
1052
1053        let mut event = Annotated::new(Event {
1054            debug_meta: Annotated::new(DebugMeta {
1055                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1056                    NativeDebugImage {
1057                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1058                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1059                        debug_id: Annotated::new(
1060                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1061                        ),
1062                        debug_file: Annotated::new("wntdll.pdb".into()),
1063                        debug_checksum: Annotated::empty(),
1064                        arch: Annotated::new("arm64".to_owned()),
1065                        image_addr: Annotated::new(Addr(0)),
1066                        image_size: Annotated::new(4096),
1067                        image_vmaddr: Annotated::new(Addr(32768)),
1068                        other: {
1069                            let mut map = Object::new();
1070                            map.insert(
1071                                "other".to_owned(),
1072                                Annotated::new(Value::String("value".to_owned())),
1073                            );
1074                            map
1075                        },
1076                    },
1077                )))]),
1078                ..Default::default()
1079            }),
1080            ..Default::default()
1081        });
1082
1083        let mut processor = PiiProcessor::new(config.compiled());
1084        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1085        assert_annotated_snapshot!(event);
1086    }
1087
1088    #[test]
1089    fn test_replace_debugmeta_path() {
1090        let config = serde_json::from_str::<PiiConfig>(
1091            r#"
1092            {
1093                "applications": {
1094                    "debug_meta.images.*.code_file": ["@anything:replace"],
1095                    "debug_meta.images.*.debug_file": ["@anything:replace"]
1096                }
1097            }
1098            "#,
1099        )
1100        .unwrap();
1101
1102        let mut event = Annotated::new(Event {
1103            debug_meta: Annotated::new(DebugMeta {
1104                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1105                    NativeDebugImage {
1106                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1107                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1108                        debug_id: Annotated::new(
1109                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1110                        ),
1111                        debug_file: Annotated::new("wntdll.pdb".into()),
1112                        debug_checksum: Annotated::empty(),
1113                        arch: Annotated::new("arm64".to_owned()),
1114                        image_addr: Annotated::new(Addr(0)),
1115                        image_size: Annotated::new(4096),
1116                        image_vmaddr: Annotated::new(Addr(32768)),
1117                        other: {
1118                            let mut map = Object::new();
1119                            map.insert(
1120                                "other".to_owned(),
1121                                Annotated::new(Value::String("value".to_owned())),
1122                            );
1123                            map
1124                        },
1125                    },
1126                )))]),
1127                ..Default::default()
1128            }),
1129            ..Default::default()
1130        });
1131
1132        let mut processor = PiiProcessor::new(config.compiled());
1133        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1134        assert_annotated_snapshot!(event);
1135    }
1136
1137    #[test]
1138    fn test_hash_debugmeta_path() {
1139        let config = serde_json::from_str::<PiiConfig>(
1140            r#"
1141            {
1142                "applications": {
1143                    "debug_meta.images.*.code_file": ["@anything:hash"],
1144                    "debug_meta.images.*.debug_file": ["@anything:hash"]
1145                }
1146            }
1147            "#,
1148        )
1149        .unwrap();
1150
1151        let mut event = Annotated::new(Event {
1152            debug_meta: Annotated::new(DebugMeta {
1153                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1154                    NativeDebugImage {
1155                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1156                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1157                        debug_id: Annotated::new(
1158                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1159                        ),
1160                        debug_file: Annotated::new("wntdll.pdb".into()),
1161                        debug_checksum: Annotated::empty(),
1162                        arch: Annotated::new("arm64".to_owned()),
1163                        image_addr: Annotated::new(Addr(0)),
1164                        image_size: Annotated::new(4096),
1165                        image_vmaddr: Annotated::new(Addr(32768)),
1166                        other: {
1167                            let mut map = Object::new();
1168                            map.insert(
1169                                "other".to_owned(),
1170                                Annotated::new(Value::String("value".to_owned())),
1171                            );
1172                            map
1173                        },
1174                    },
1175                )))]),
1176                ..Default::default()
1177            }),
1178            ..Default::default()
1179        });
1180
1181        let mut processor = PiiProcessor::new(config.compiled());
1182        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1183        assert_annotated_snapshot!(event);
1184    }
1185
1186    #[test]
1187    fn test_debugmeta_path_not_addressible_with_wildcard_selector() {
1188        let config = serde_json::from_str::<PiiConfig>(
1189            r#"
1190            {
1191                "applications": {
1192                    "$string": ["@anything:remove"],
1193                    "**": ["@anything:remove"],
1194                    "debug_meta.**": ["@anything:remove"],
1195                    "(debug_meta.images.**.code_file & $string)": ["@anything:remove"]
1196                }
1197            }
1198            "#,
1199        )
1200        .unwrap();
1201
1202        let mut event = Annotated::new(Event {
1203            debug_meta: Annotated::new(DebugMeta {
1204                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1205                    NativeDebugImage {
1206                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1207                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1208                        debug_id: Annotated::new(
1209                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1210                        ),
1211                        debug_file: Annotated::new("wntdll.pdb".into()),
1212                        debug_checksum: Annotated::empty(),
1213                        arch: Annotated::new("arm64".to_owned()),
1214                        image_addr: Annotated::new(Addr(0)),
1215                        image_size: Annotated::new(4096),
1216                        image_vmaddr: Annotated::new(Addr(32768)),
1217                        other: {
1218                            let mut map = Object::new();
1219                            map.insert(
1220                                "other".to_owned(),
1221                                Annotated::new(Value::String("value".to_owned())),
1222                            );
1223                            map
1224                        },
1225                    },
1226                )))]),
1227                ..Default::default()
1228            }),
1229            ..Default::default()
1230        });
1231
1232        let mut processor = PiiProcessor::new(config.compiled());
1233        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1234        assert_annotated_snapshot!(event);
1235    }
1236
1237    #[test]
1238    fn test_quoted_keys() {
1239        let config = serde_json::from_str::<PiiConfig>(
1240            r#"
1241            {
1242                "applications": {
1243                    "extra.'special ,./<>?!@#$%^&*())''gärbage'''": ["@anything:remove"]
1244                }
1245            }
1246            "#,
1247        )
1248        .unwrap();
1249
1250        let mut event = Annotated::new(Event {
1251            extra: {
1252                let mut map = Object::new();
1253                map.insert(
1254                    "do not ,./<>?!@#$%^&*())'ßtrip'".to_owned(),
1255                    Annotated::new(ExtraValue(Value::String("foo".to_owned()))),
1256                );
1257                map.insert(
1258                    "special ,./<>?!@#$%^&*())'gärbage'".to_owned(),
1259                    Annotated::new(ExtraValue(Value::String("bar".to_owned()))),
1260                );
1261                Annotated::new(map)
1262            },
1263            ..Default::default()
1264        });
1265
1266        let mut processor = PiiProcessor::new(config.compiled());
1267        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1268        assert_annotated_snapshot!(event);
1269    }
1270
1271    #[test]
1272    fn test_logentry_value_types() {
1273        // Assert that logentry.formatted is addressable as $string, $message and $logentry.formatted.
1274        for formatted_selector in &[
1275            "$logentry.formatted",
1276            "$message",
1277            "$logentry.formatted && $message",
1278            "$string",
1279        ] {
1280            let config = serde_json::from_str::<PiiConfig>(&format!(
1281                r##"
1282                {{
1283                    "applications": {{
1284                        "{formatted_selector}": ["@anything:remove"]
1285                    }}
1286                }}
1287                "##
1288            ))
1289            .unwrap();
1290
1291            let mut event = Annotated::new(Event {
1292                logentry: Annotated::new(LogEntry {
1293                    formatted: Annotated::new("Hello world!".to_owned().into()),
1294                    ..Default::default()
1295                }),
1296                ..Default::default()
1297            });
1298
1299            let mut processor = PiiProcessor::new(config.compiled());
1300            process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1301            assert!(
1302                event
1303                    .value()
1304                    .unwrap()
1305                    .logentry
1306                    .value()
1307                    .unwrap()
1308                    .formatted
1309                    .value()
1310                    .is_none()
1311            );
1312        }
1313    }
1314
1315    #[test]
1316    fn test_logentry_formatted_never_fully_filtered() {
1317        // Test that logentry.formatted gets smart PII scrubbing via to_pii_config
1318        // and is never completely filtered even with aggressive PII rules
1319        let config = crate::convert::to_pii_config(&crate::DataScrubbingConfig {
1320            scrub_data: true,
1321            scrub_defaults: true,
1322            scrub_ip_addresses: true,
1323            ..Default::default()
1324        })
1325        .unwrap()
1326        .unwrap();
1327
1328        let mut event = Annotated::new(Event {
1329            logentry: Annotated::new(LogEntry {
1330                formatted: Annotated::new(
1331                    "User john.doe@company.com failed login with card 4111-1111-1111-1111"
1332                        .to_owned()
1333                        .into(),
1334                ),
1335                ..Default::default()
1336            }),
1337            ..Default::default()
1338        });
1339
1340        let mut processor = PiiProcessor::new(config.compiled());
1341        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1342        assert_annotated_snapshot!(event, @r#"
1343        {
1344          "logentry": {
1345            "formatted": "User [email] failed login with card [creditcard]"
1346          },
1347          "_meta": {
1348            "logentry": {
1349              "formatted": {
1350                "": {
1351                  "rem": [
1352                    [
1353                      "@email:replace",
1354                      "s",
1355                      5,
1356                      12
1357                    ],
1358                    [
1359                      "@creditcard:replace",
1360                      "s",
1361                      36,
1362                      48
1363                    ]
1364                  ],
1365                  "len": 68
1366                }
1367              }
1368            }
1369          }
1370        }
1371        "#);
1372    }
1373
1374    #[test]
1375    fn test_logentry_formatted_bearer_token_scrubbing() {
1376        // Test that bearer tokens are properly scrubbed in logentry.formatted
1377        let config = crate::convert::to_pii_config(&crate::DataScrubbingConfig {
1378            scrub_data: true,
1379            scrub_defaults: true,
1380            ..Default::default()
1381        })
1382        .unwrap()
1383        .unwrap();
1384
1385        let mut event = Annotated::new(Event {
1386            logentry: Annotated::new(LogEntry {
1387                formatted: Annotated::new(
1388                    "API request failed with Bearer ABC123XYZ789TOKEN and other data"
1389                        .to_owned()
1390                        .into(),
1391                ),
1392                ..Default::default()
1393            }),
1394            ..Default::default()
1395        });
1396
1397        let mut processor = PiiProcessor::new(config.compiled());
1398        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1399        assert_annotated_snapshot!(event, @r#"
1400        {
1401          "logentry": {
1402            "formatted": "API request failed with Bearer [token] and other data"
1403          },
1404          "_meta": {
1405            "logentry": {
1406              "formatted": {
1407                "": {
1408                  "rem": [
1409                    [
1410                      "@bearer:replace",
1411                      "s",
1412                      24,
1413                      38
1414                    ]
1415                  ],
1416                  "len": 63
1417                }
1418              }
1419            }
1420          }
1421        }
1422        "#);
1423    }
1424
1425    #[test]
1426    fn test_logentry_formatted_password_word_not_scrubbed() {
1427        let config = PiiConfig::default();
1428        let mut event = Annotated::new(Event {
1429            logentry: Annotated::new(LogEntry {
1430                formatted: Annotated::new(
1431                    "User password is secret123 for authentication"
1432                        .to_owned()
1433                        .into(),
1434                ),
1435                ..Default::default()
1436            }),
1437            ..Default::default()
1438        });
1439
1440        let mut processor = PiiProcessor::new(config.compiled());
1441        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1442        assert_annotated_snapshot!(event, @r#"
1443        {
1444          "logentry": {
1445            "formatted": "User password is secret123 for authentication"
1446          }
1447        }
1448        "#);
1449    }
1450
1451    #[test]
1452    fn test_ip_address_hashing() {
1453        let config = serde_json::from_str::<PiiConfig>(
1454            r#"
1455            {
1456                "applications": {
1457                    "$user.ip_address": ["@ip:hash"]
1458                }
1459            }
1460            "#,
1461        )
1462        .unwrap();
1463
1464        let mut event = Annotated::new(Event {
1465            user: Annotated::new(User {
1466                ip_address: Annotated::new(IpAddr("127.0.0.1".to_owned())),
1467                ..Default::default()
1468            }),
1469            ..Default::default()
1470        });
1471
1472        let mut processor = PiiProcessor::new(config.compiled());
1473        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1474
1475        let user = event.value().unwrap().user.value().unwrap();
1476
1477        assert!(user.ip_address.value().is_none());
1478
1479        assert_eq!(
1480            user.id.value().unwrap().as_str(),
1481            "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1482        );
1483    }
1484
1485    #[test]
1486    fn test_ip_address_hashing_does_not_overwrite_id() {
1487        let config = serde_json::from_str::<PiiConfig>(
1488            r#"
1489            {
1490                "applications": {
1491                    "$user.ip_address": ["@ip:hash"]
1492                }
1493            }
1494            "#,
1495        )
1496        .unwrap();
1497
1498        let mut event = Annotated::new(Event {
1499            user: Annotated::new(User {
1500                id: Annotated::new("123".to_owned().into()),
1501                ip_address: Annotated::new(IpAddr("127.0.0.1".to_owned())),
1502                ..Default::default()
1503            }),
1504            ..Default::default()
1505        });
1506
1507        let mut processor = PiiProcessor::new(config.compiled());
1508        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1509
1510        let user = event.value().unwrap().user.value().unwrap();
1511
1512        // This will get wiped out in renormalization though
1513        assert_eq!(
1514            user.ip_address.value().unwrap().as_str(),
1515            "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1516        );
1517
1518        assert_eq!(user.id.value().unwrap().as_str(), "123");
1519    }
1520
1521    #[test]
1522    fn test_replace_replaced_text() {
1523        let chunks = vec![Chunk::Redaction {
1524            text: "[ip]".into(),
1525            rule_id: "@ip".into(),
1526            ty: RemarkType::Substituted,
1527        }];
1528        let rule = RuleRef {
1529            id: "@ip:replace".into(),
1530            origin: "@ip".into(),
1531            ty: RuleType::Ip,
1532            redaction: Redaction::Replace(ReplaceRedaction {
1533                text: "[ip]".into(),
1534            }),
1535        };
1536        let res = apply_regex_to_chunks(
1537            chunks.clone(),
1538            &rule,
1539            &Regex::new(r#".*"#).unwrap(),
1540            ReplaceBehavior::Value,
1541        );
1542        assert_eq!(chunks, res);
1543    }
1544
1545    #[test]
1546    fn test_replace_replaced_text_anything() {
1547        let chunks = vec![Chunk::Redaction {
1548            text: "[Filtered]".into(),
1549            rule_id: "@password:filter".into(),
1550            ty: RemarkType::Substituted,
1551        }];
1552        let rule = RuleRef {
1553            id: "@anything:filter".into(),
1554            origin: "@anything:filter".into(),
1555            ty: RuleType::Anything,
1556            redaction: Redaction::Replace(ReplaceRedaction {
1557                text: "[Filtered]".into(),
1558            }),
1559        };
1560        let res = apply_regex_to_chunks(
1561            chunks.clone(),
1562            &rule,
1563            &Regex::new(r#".*"#).unwrap(),
1564            ReplaceBehavior::Groups(smallvec::smallvec![0]),
1565        );
1566        assert_eq!(chunks, res);
1567    }
1568
1569    #[test]
1570    fn test_trace_route_params_scrubbed() {
1571        let mut trace_context: Annotated<TraceContext> = Annotated::from_json(
1572            r#"
1573            {
1574                "type": "trace",
1575                "trace_id": "4c79f60c11214eb38604f4ae0781bfb2",
1576                "span_id": "fa90fdead5f74052",
1577                "data": {
1578                    "previousRoute": {
1579                        "params": {
1580                            "password": "test"
1581                        }
1582                    }
1583                }
1584            }
1585            "#,
1586        )
1587        .unwrap();
1588
1589        let ds_config = DataScrubbingConfig {
1590            scrub_data: true,
1591            scrub_defaults: true,
1592            ..Default::default()
1593        };
1594        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1595        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1596
1597        process_value(
1598            &mut trace_context,
1599            &mut pii_processor,
1600            ProcessingState::root(),
1601        )
1602        .unwrap();
1603        assert_annotated_snapshot!(trace_context);
1604    }
1605
1606    #[test]
1607    fn test_scrub_span_data_http_not_scrubbed() {
1608        let mut span: Annotated<Span> = Annotated::from_json(
1609            r#"{
1610                "data": {
1611                    "http": {
1612                        "query": "dance=true"
1613                    }
1614                }
1615            }"#,
1616        )
1617        .unwrap();
1618
1619        let ds_config = DataScrubbingConfig {
1620            scrub_data: true,
1621            scrub_defaults: true,
1622            ..Default::default()
1623        };
1624        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1625        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1626
1627        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1628        assert_annotated_snapshot!(span);
1629    }
1630
1631    #[test]
1632    fn test_scrub_span_data_http_strings_are_scrubbed() {
1633        let mut span: Annotated<Span> = Annotated::from_json(
1634            r#"{
1635                "data": {
1636                    "http": {
1637                        "query": "ccnumber=5105105105105100&process_id=123",
1638                        "fragment": "ccnumber=5105105105105100,process_id=123"
1639                    }
1640                }
1641            }"#,
1642        )
1643        .unwrap();
1644
1645        let ds_config = DataScrubbingConfig {
1646            scrub_data: true,
1647            scrub_defaults: true,
1648            ..Default::default()
1649        };
1650        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1651        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1652
1653        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1654        assert_annotated_snapshot!(span);
1655    }
1656
1657    #[test]
1658    fn test_scrub_span_data_http_objects_are_scrubbed() {
1659        let mut span: Annotated<Span> = Annotated::from_json(
1660            r#"{
1661                "data": {
1662                    "http": {
1663                        "query": {
1664                            "ccnumber": "5105105105105100",
1665                            "process_id": "123"
1666                        },
1667                        "fragment": {
1668                            "ccnumber": "5105105105105100",
1669                            "process_id": "123"
1670                        }
1671                    }
1672                }
1673            }"#,
1674        )
1675        .unwrap();
1676
1677        let ds_config = DataScrubbingConfig {
1678            scrub_data: true,
1679            scrub_defaults: true,
1680            ..Default::default()
1681        };
1682        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1683        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1684
1685        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1686        assert_annotated_snapshot!(span);
1687    }
1688
1689    #[test]
1690    fn test_scrub_span_data_untyped_props_are_scrubbed() {
1691        let mut span: Annotated<Span> = Annotated::from_json(
1692            r#"{
1693                "data": {
1694                    "untyped": "ccnumber=5105105105105100",
1695                    "more_untyped": {
1696                        "typed": "no",
1697                        "scrubbed": "yes",
1698                        "ccnumber": "5105105105105100"
1699                    }
1700                }
1701            }"#,
1702        )
1703        .unwrap();
1704
1705        let ds_config = DataScrubbingConfig {
1706            scrub_data: true,
1707            scrub_defaults: true,
1708            ..Default::default()
1709        };
1710        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1711        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1712
1713        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1714        assert_annotated_snapshot!(span);
1715    }
1716
1717    #[test]
1718    fn test_span_data_pii() {
1719        let mut span = Span::from_value(
1720            json!({
1721                "data": {
1722                    "code.filepath": "src/sentry/api/authentication.py",
1723                }
1724            })
1725            .into(),
1726        );
1727
1728        let ds_config = DataScrubbingConfig {
1729            scrub_data: true,
1730            scrub_defaults: true,
1731            ..Default::default()
1732        };
1733        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1734
1735        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1736        processor::process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1737        assert_eq!(
1738            get_value!(span.data.code_filepath!).as_str(),
1739            Some("src/sentry/api/authentication.py")
1740        );
1741    }
1742
1743    #[test]
1744    fn test_csp_source_file_pii() {
1745        let mut event = Event::from_value(
1746            json!({
1747                "csp": {
1748                    "source_file": "authentication.js",
1749                }
1750            })
1751            .into(),
1752        );
1753
1754        let config = serde_json::from_str::<PiiConfig>(
1755            r#"
1756            {
1757                "applications": {
1758                    "csp.source_file": ["@anything:filter"]
1759                }
1760            }
1761            "#,
1762        )
1763        .unwrap();
1764
1765        let mut pii_processor = PiiProcessor::new(config.compiled());
1766        processor::process_value(&mut event, &mut pii_processor, ProcessingState::root()).unwrap();
1767        assert_eq!(get_value!(event.csp.source_file!).as_str(), "[Filtered]");
1768    }
1769
1770    #[test]
1771    fn test_scrub_breadcrumb_data_http_not_scrubbed() {
1772        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1773            r#"{
1774                "data": {
1775                    "http": {
1776                        "query": "dance=true"
1777                    }
1778                }
1779            }"#,
1780        )
1781        .unwrap();
1782
1783        let ds_config = DataScrubbingConfig {
1784            scrub_data: true,
1785            scrub_defaults: true,
1786            ..Default::default()
1787        };
1788        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1789        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1790        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1791        assert_annotated_snapshot!(breadcrumb);
1792    }
1793
1794    #[test]
1795    fn test_scrub_breadcrumb_data_http_strings_are_scrubbed() {
1796        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1797            r#"{
1798                "data": {
1799                    "http": {
1800                        "query": "ccnumber=5105105105105100&process_id=123",
1801                        "fragment": "ccnumber=5105105105105100,process_id=123"
1802                    }
1803                }
1804            }"#,
1805        )
1806        .unwrap();
1807
1808        let ds_config = DataScrubbingConfig {
1809            scrub_data: true,
1810            scrub_defaults: true,
1811            ..Default::default()
1812        };
1813        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1814        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1815        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1816        assert_annotated_snapshot!(breadcrumb);
1817    }
1818
1819    #[test]
1820    fn test_scrub_breadcrumb_data_http_objects_are_scrubbed() {
1821        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1822            r#"{
1823                "data": {
1824                    "http": {
1825                        "query": {
1826                            "ccnumber": "5105105105105100",
1827                            "process_id": "123"
1828                        },
1829                        "fragment": {
1830                            "ccnumber": "5105105105105100",
1831                            "process_id": "123"
1832                        }
1833                    }
1834                }
1835            }"#,
1836        )
1837        .unwrap();
1838
1839        let ds_config = DataScrubbingConfig {
1840            scrub_data: true,
1841            scrub_defaults: true,
1842            ..Default::default()
1843        };
1844        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1845        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1846
1847        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1848        assert_annotated_snapshot!(breadcrumb);
1849    }
1850
1851    #[test]
1852    fn test_scrub_breadcrumb_data_untyped_props_are_scrubbed() {
1853        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1854            r#"{
1855                "data": {
1856                    "untyped": "ccnumber=5105105105105100",
1857                    "more_untyped": {
1858                        "typed": "no",
1859                        "scrubbed": "yes",
1860                        "ccnumber": "5105105105105100"
1861                    }
1862                }
1863            }"#,
1864        )
1865        .unwrap();
1866
1867        let ds_config = DataScrubbingConfig {
1868            scrub_data: true,
1869            scrub_defaults: true,
1870            ..Default::default()
1871        };
1872        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1873        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1874        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1875        assert_annotated_snapshot!(breadcrumb);
1876    }
1877
1878    #[test]
1879    fn test_scrub_graphql_response_data_with_variables() {
1880        let mut data = Event::from_value(
1881            json!({
1882              "request": {
1883                "data": {
1884                  "query": "{\n  viewer {\n    login\n  }\n}",
1885                  "variables": {
1886                    "login": "foo"
1887                  }
1888                },
1889                "api_target": "graphql"
1890              },
1891              "contexts": {
1892                "response": {
1893                  "type": "response",
1894                  "data": {
1895                    "data": {
1896                      "viewer": {
1897                        "login": "foo"
1898                      }
1899                    }
1900                  }
1901                }
1902              }
1903            })
1904            .into(),
1905        );
1906
1907        scrub_graphql(data.value_mut().as_mut().unwrap());
1908
1909        assert_debug_snapshot!(&data);
1910    }
1911
1912    #[test]
1913    fn test_scrub_graphql_response_data_without_variables() {
1914        let mut data = Event::from_value(
1915            json!({
1916              "request": {
1917                "data": {
1918                  "query": "{\n  viewer {\n    login\n  }\n}"
1919                },
1920                "api_target": "graphql"
1921              },
1922              "contexts": {
1923                "response": {
1924                  "type": "response",
1925                  "data": {
1926                    "data": {
1927                      "viewer": {
1928                        "login": "foo"
1929                      }
1930                    }
1931                  }
1932                }
1933              }
1934            })
1935            .into(),
1936        );
1937
1938        scrub_graphql(data.value_mut().as_mut().unwrap());
1939        assert_debug_snapshot!(&data);
1940    }
1941
1942    #[test]
1943    fn test_does_not_scrub_if_no_graphql() {
1944        let mut data = Event::from_value(
1945            json!({
1946              "request": {
1947                "data": {
1948                  "query": "{\n  viewer {\n    login\n  }\n}",
1949                  "variables": {
1950                    "login": "foo"
1951                  }
1952                },
1953              },
1954              "contexts": {
1955                "response": {
1956                  "type": "response",
1957                  "data": {
1958                    "data": {
1959                      "viewer": {
1960                        "login": "foo"
1961                      }
1962                    }
1963                  }
1964                }
1965              }
1966            })
1967            .into(),
1968        );
1969
1970        let scrubbing_config = DataScrubbingConfig {
1971            scrub_data: true,
1972            scrub_ip_addresses: true,
1973            scrub_defaults: true,
1974            ..Default::default()
1975        };
1976
1977        let pii_config = to_pii_config(&scrubbing_config).unwrap();
1978        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1979
1980        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1981
1982        assert_debug_snapshot!(&data);
1983    }
1984
1985    #[test]
1986    fn test_logentry_params_scrubbed() {
1987        let config = serde_json::from_str::<PiiConfig>(
1988            r##"
1989                {
1990                    "applications": {
1991                        "$string": ["@anything:remove"]
1992                    }
1993                }
1994                "##,
1995        )
1996        .unwrap();
1997
1998        let mut event = Annotated::new(Event {
1999            logentry: Annotated::new(LogEntry {
2000                message: Annotated::new(Message::from("failed to parse report id=%s".to_owned())),
2001                formatted: Annotated::new("failed to parse report id=1".to_owned().into()),
2002                params: Annotated::new(Value::Array(vec![Annotated::new(Value::String(
2003                    "12345".to_owned(),
2004                ))])),
2005                ..Default::default()
2006            }),
2007            ..Default::default()
2008        });
2009
2010        let mut processor = PiiProcessor::new(config.compiled());
2011        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2012
2013        let params = get_value!(event.logentry.params!);
2014        assert_debug_snapshot!(params, @r###"
2015        Array(
2016            [
2017                Meta {
2018                    remarks: [
2019                        Remark {
2020                            ty: Removed,
2021                            rule_id: "@anything:remove",
2022                            range: None,
2023                        },
2024                    ],
2025                    errors: [],
2026                    original_length: None,
2027                    original_value: None,
2028                },
2029            ],
2030        )
2031        "###);
2032    }
2033
2034    #[test]
2035    fn test_is_pairlist() {
2036        for (case, expected) in [
2037            (r#"[]"#, false),
2038            (r#"["foo"]"#, false),
2039            (r#"["foo", 123]"#, false),
2040            (r#"[[1, "foo"]]"#, false),
2041            (r#"[[["too_nested", 123]]]"#, false),
2042            (r#"[["foo", "bar"], [1, "foo"]]"#, false),
2043            (r#"[["foo", "bar"], ["foo", "bar", "baz"]]"#, false),
2044            (r#"[["foo", "bar", "baz"], ["foo", "bar"]]"#, false),
2045            (r#"["foo", ["bar", "baz"], ["foo", "bar"]]"#, false),
2046            (r#"[["foo", "bar"], [["too_nested", 123]]]"#, false),
2047            (r#"[["foo", 123]]"#, true),
2048            (r#"[["foo", "bar"]]"#, true),
2049            (
2050                r#"[["foo", "bar"], ["foo", {"nested": {"something": 1}}]]"#,
2051                true,
2052            ),
2053        ] {
2054            let v = Annotated::<Value>::from_json(case).unwrap();
2055            let Annotated(Some(Value::Array(mut a)), _) = v else {
2056                panic!()
2057            };
2058            assert_eq!(is_pairlist(&mut a), expected, "{case}");
2059        }
2060    }
2061
2062    #[test]
2063    fn test_tuple_array_scrubbed_with_path_selector() {
2064        // We expect that both of these configs express the same semantics.
2065        let configs = vec![
2066            // This configuration matches on the authorization element (the 1st element of the array
2067            // represents the key).
2068            r##"
2069                {
2070                    "applications": {
2071                        "exception.values.0.stacktrace.frames.0.vars.headers.authorization": ["@anything:replace"]
2072                    }
2073                }
2074                "##,
2075            // This configuration matches on the 2nd element of the array.
2076            r##"
2077                {
2078                    "applications": {
2079                        "exception.values.0.stacktrace.frames.0.vars.headers.0.1": ["@anything:replace"]
2080                    }
2081                }
2082                "##,
2083        ];
2084
2085        let mut event = Event::from_value(
2086            serde_json::json!(
2087            {
2088              "message": "hi",
2089              "exception": {
2090                "values": [
2091                  {
2092                    "type": "BrokenException",
2093                    "value": "Something failed",
2094                    "stacktrace": {
2095                      "frames": [
2096                        {
2097                            "vars": {
2098                                "headers": [
2099                                    ["authorization", "Bearer abc123"]
2100                                ]
2101                            }
2102                        }
2103                      ]
2104                    }
2105                  }
2106                ]
2107              }
2108            })
2109            .into(),
2110        );
2111
2112        for config in configs {
2113            let config = serde_json::from_str::<PiiConfig>(config).unwrap();
2114            let mut processor = PiiProcessor::new(config.compiled());
2115            process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2116
2117            let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
2118
2119            allow_duplicates!(assert_debug_snapshot!(vars, @r###"
2120                              FrameVars(
2121                                  {
2122                                      "headers": Array(
2123                                          [
2124                                              Array(
2125                                                  [
2126                                                      String(
2127                                                          "authorization",
2128                                                      ),
2129                                                      Annotated(
2130                                                          String(
2131                                                              "[Filtered]",
2132                                                          ),
2133                                                          Meta {
2134                                                              remarks: [
2135                                                                  Remark {
2136                                                                      ty: Substituted,
2137                                                                      rule_id: "@anything:replace",
2138                                                                      range: Some(
2139                                                                          (
2140                                                                              0,
2141                                                                              10,
2142                                                                          ),
2143                                                                      ),
2144                                                                  },
2145                                                              ],
2146                                                              errors: [],
2147                                                              original_length: Some(
2148                                                                  13,
2149                                                              ),
2150                                                              original_value: None,
2151                                                          },
2152                                                      ),
2153                                                  ],
2154                                              ),
2155                                          ],
2156                                      ),
2157                                  },
2158                              )
2159                              "###));
2160        }
2161    }
2162
2163    #[test]
2164    fn test_tuple_array_scrubbed_with_string_selector_and_password_matcher() {
2165        let config = serde_json::from_str::<PiiConfig>(
2166            r##"
2167                {
2168                    "applications": {
2169                        "$string": ["@password:remove"]
2170                    }
2171                }
2172                "##,
2173        )
2174        .unwrap();
2175
2176        let mut event = Event::from_value(
2177            serde_json::json!(
2178            {
2179              "message": "hi",
2180              "exception": {
2181                "values": [
2182                  {
2183                    "type": "BrokenException",
2184                    "value": "Something failed",
2185                    "stacktrace": {
2186                      "frames": [
2187                        {
2188                            "vars": {
2189                                "headers": [
2190                                    ["authorization", "abc123"]
2191                                ]
2192                            }
2193                        }
2194                      ]
2195                    }
2196                  }
2197                ]
2198              }
2199            })
2200            .into(),
2201        );
2202
2203        let mut processor = PiiProcessor::new(config.compiled());
2204        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2205
2206        let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
2207
2208        assert_debug_snapshot!(vars, @r###"
2209        FrameVars(
2210            {
2211                "headers": Array(
2212                    [
2213                        Array(
2214                            [
2215                                String(
2216                                    "authorization",
2217                                ),
2218                                Meta {
2219                                    remarks: [
2220                                        Remark {
2221                                            ty: Removed,
2222                                            rule_id: "@password:remove",
2223                                            range: None,
2224                                        },
2225                                    ],
2226                                    errors: [],
2227                                    original_length: None,
2228                                    original_value: None,
2229                                },
2230                            ],
2231                        ),
2232                    ],
2233                ),
2234            },
2235        )
2236        "###);
2237    }
2238}