relay_pii/
processor.rs

1use std::borrow::Cow;
2use std::collections::{BTreeMap, BTreeSet};
3use std::mem;
4use std::sync::OnceLock;
5
6use regex::Regex;
7use relay_event_schema::processor::{
8    self, Chunk, FieldAttrs, Pii, ProcessValue, ProcessingAction, ProcessingResult,
9    ProcessingState, Processor, ValueType, enum_set, process_value,
10};
11use relay_event_schema::protocol::{
12    AsPair, Event, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User,
13};
14use relay_protocol::{Annotated, Array, Meta, Remark, RemarkType, Value};
15
16use crate::compiledconfig::{CompiledPiiConfig, RuleRef};
17use crate::config::RuleType;
18use crate::redactions::Redaction;
19use crate::regexes::{self, ANYTHING_REGEX, PatternType, ReplaceBehavior};
20use crate::utils;
21
22/// Controls how scrubbing rules are applied to attributes.
23#[derive(Debug, Clone, Copy)]
24pub enum AttributeMode {
25    /// Treat the attribute as an object and allow referring
26    /// to individual fields.
27    Object,
28    /// Identify the attribute with its value and apply all
29    /// rules there directly.
30    ValueOnly,
31}
32
33/// A processor that performs PII stripping.
34pub struct PiiProcessor<'a> {
35    /// Controls how rules are applied to attributes.
36    attribute_mode: AttributeMode,
37    compiled_config: &'a CompiledPiiConfig,
38}
39
40impl<'a> PiiProcessor<'a> {
41    /// Creates a new processor based on a config.
42    pub fn new(compiled_config: &'a CompiledPiiConfig) -> PiiProcessor<'a> {
43        // this constructor needs to be cheap... a new PiiProcessor is created for each event. Move
44        // any init logic into CompiledPiiConfig::new.
45        PiiProcessor {
46            compiled_config,
47            attribute_mode: AttributeMode::Object,
48        }
49    }
50
51    /// Sets an `AttributeMode` on this processor.
52    pub fn attribute_mode(mut self, attribute_mode: AttributeMode) -> Self {
53        self.attribute_mode = attribute_mode;
54        self
55    }
56
57    fn apply_all_rules(
58        &self,
59        meta: &mut Meta,
60        state: &ProcessingState<'_>,
61        mut value: Option<&mut String>,
62    ) -> ProcessingResult {
63        let pii = state.pii();
64        if pii == Pii::False {
65            return Ok(());
66        }
67
68        for (selector, rules) in self.compiled_config.applications.iter() {
69            if selector.matches_path(&state.path()) {
70                #[allow(clippy::needless_option_as_deref)]
71                for rule in rules {
72                    let reborrowed_value = value.as_deref_mut();
73                    apply_rule_to_value(meta, rule, state.path().key(), reborrowed_value)?;
74                }
75            }
76        }
77
78        Ok(())
79    }
80}
81
82impl Processor for PiiProcessor<'_> {
83    fn before_process<T: ProcessValue>(
84        &mut self,
85        value: Option<&T>,
86        meta: &mut Meta,
87        state: &ProcessingState<'_>,
88    ) -> ProcessingResult {
89        if let Some(Value::String(original_value)) = meta.original_value_as_mut() {
90            // Also apply pii scrubbing to the original value (set by normalization or other processors),
91            // such that we do not leak sensitive data through meta. Deletes `original_value` if an Error
92            // value is returned.
93            if let Some(parent) = state.iter().next() {
94                let path = state.path();
95                let new_state = parent.enter_borrowed(
96                    path.key().unwrap_or(""),
97                    Some(Cow::Borrowed(state.attrs())),
98                    enum_set!(ValueType::String),
99                );
100
101                if self
102                    .apply_all_rules(&mut Meta::default(), &new_state, Some(original_value))
103                    .is_err()
104                {
105                    // `apply_all_rules` returned `DeleteValueHard` or `DeleteValueSoft`, so delete the original as well.
106                    meta.set_original_value(Option::<String>::None);
107                }
108            }
109        }
110
111        // booleans cannot be PII, and strings are handled in process_string
112        if state.value_type().contains(ValueType::Boolean)
113            || state.value_type().contains(ValueType::String)
114        {
115            return Ok(());
116        }
117
118        if value.is_none() {
119            return Ok(());
120        }
121
122        // apply rules based on key/path
123        self.apply_all_rules(meta, state, None)
124    }
125
126    fn process_array<T>(
127        &mut self,
128        array: &mut Array<T>,
129        _meta: &mut Meta,
130        state: &ProcessingState<'_>,
131    ) -> ProcessingResult
132    where
133        T: ProcessValue,
134    {
135        if is_pairlist(array) {
136            for annotated in array {
137                let mut mapped = mem::take(annotated).map_value(T::into_value);
138
139                if let Some(Value::Array(pair)) = mapped.value_mut() {
140                    let mut value = mem::take(&mut pair[1]);
141                    let value_type = ValueType::for_field(&value);
142
143                    if let Some(key_name) = &pair[0].as_str() {
144                        // We enter the key of the first element of the array, since we treat it
145                        // as a pair.
146                        let key_state =
147                            state.enter_borrowed(key_name, state.inner_attrs(), value_type);
148                        // We process the value with a state that "simulates" the first value of the
149                        // array as if it was the key of a dictionary.
150                        process_value(&mut value, self, &key_state)?;
151                    }
152
153                    // Put value back into pair.
154                    pair[1] = value;
155                }
156
157                // Put pair back into array.
158                *annotated = T::from_value(mapped);
159            }
160
161            Ok(())
162        } else {
163            // If we didn't find a pairlist, we can process child values as normal.
164            array.process_child_values(self, state)
165        }
166    }
167
168    fn process_string(
169        &mut self,
170        value: &mut String,
171        meta: &mut Meta,
172        state: &ProcessingState<'_>,
173    ) -> ProcessingResult {
174        if let "" | "true" | "false" | "null" | "undefined" = value.as_str() {
175            return Ok(());
176        }
177
178        // same as before_process. duplicated here because we can only check for "true",
179        // "false" etc in process_string.
180        self.apply_all_rules(meta, state, Some(value))
181    }
182
183    fn process_native_image_path(
184        &mut self,
185        NativeImagePath(value): &mut NativeImagePath,
186        meta: &mut Meta,
187        state: &ProcessingState<'_>,
188    ) -> ProcessingResult {
189        // In NativeImagePath we must not strip the file's basename because that would break
190        // processing.
191        //
192        // We pop the basename from the end of the string, call process_string and push the
193        // basename again.
194        //
195        // The ranges in Meta should still be right as long as we only pop/push from the end of the
196        // string. If we decide that we need to preserve anything other than suffixes all PII
197        // tooltips/annotations are potentially wrong.
198
199        if let Some(index) = value.rfind(['/', '\\']) {
200            let basename = value.split_off(index);
201            match self.process_string(value, meta, state) {
202                Ok(()) => value.push_str(&basename),
203                Err(ProcessingAction::DeleteValueHard) | Err(ProcessingAction::DeleteValueSoft) => {
204                    basename[1..].clone_into(value);
205                }
206                Err(ProcessingAction::InvalidTransaction(x)) => {
207                    return Err(ProcessingAction::InvalidTransaction(x));
208                }
209            }
210        }
211
212        Ok(())
213    }
214
215    fn process_pairlist<T: ProcessValue + AsPair>(
216        &mut self,
217        value: &mut PairList<T>,
218        _meta: &mut Meta,
219        state: &ProcessingState,
220    ) -> ProcessingResult {
221        utils::process_pairlist(self, value, state)
222    }
223
224    fn process_attributes(
225        &mut self,
226        value: &mut relay_event_schema::protocol::Attributes,
227        _meta: &mut Meta,
228        state: &ProcessingState,
229    ) -> ProcessingResult {
230        match self.attribute_mode {
231            // Treat each attribute as an object and just process them field by field.
232            AttributeMode::Object => value.process_child_values(self, state),
233            // Identify each attribute with its `value` and only process that.
234            AttributeMode::ValueOnly => {
235                for (key, attribute) in value.0.iter_mut() {
236                    let Some(attribute) = attribute.value_mut() else {
237                        continue;
238                    };
239
240                    // We need some manual state management here because we're bypassing all the
241                    // intermediate structures and pointing at the value directly. This essentially
242                    // mimics the attributes and value type that the metastructure derivation would
243                    // produce for the attribute vaue.
244                    let attrs = FieldAttrs::new()
245                        .pii_dynamic(relay_event_schema::protocol::attribute_pii_from_conventions);
246                    let inner_value = &mut attribute.value.value;
247                    let inner_value_type = ValueType::for_field(inner_value);
248                    let entered =
249                        state.enter_borrowed(key, Some(Cow::Borrowed(&attrs)), inner_value_type);
250
251                    processor::process_value(inner_value, self, &entered)?;
252                    self.process_other(&mut attribute.other, state)?;
253                }
254                Ok(())
255            }
256        }
257    }
258
259    fn process_user(
260        &mut self,
261        user: &mut User,
262        _meta: &mut Meta,
263        state: &ProcessingState<'_>,
264    ) -> ProcessingResult {
265        let ip_was_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
266
267        // Recurse into the user and does PII processing on fields.
268        user.process_child_values(self, state)?;
269
270        let has_other_fields = user.id.value().is_some()
271            || user.username.value().is_some()
272            || user.email.value().is_some();
273
274        let ip_is_still_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
275
276        // If the IP address has become invalid as part of PII processing, we move it into the user
277        // ID. That ensures people can do IP hashing and still have a correct users-affected count.
278        //
279        // Right now both Snuba and EventUser discard unparseable IPs for indexing, and we assume
280        // we want to keep it that way.
281        //
282        // If there are any other fields set that take priority over the IP for uniquely
283        // identifying a user (has_other_fields), we do not want to do anything. The value will be
284        // wiped out in renormalization anyway.
285        if ip_was_valid && !has_other_fields && !ip_is_still_valid {
286            user.id = mem::take(&mut user.ip_address).map_value(|ip| ip.into_inner().into());
287            user.ip_address.meta_mut().add_remark(Remark::new(
288                RemarkType::Removed,
289                "pii:ip_address".to_owned(),
290            ));
291        }
292
293        Ok(())
294    }
295
296    // Replay PII processor entry point.
297    fn process_replay(
298        &mut self,
299        replay: &mut Replay,
300        _meta: &mut Meta,
301        state: &ProcessingState<'_>,
302    ) -> ProcessingResult {
303        replay.process_child_values(self, state)?;
304        Ok(())
305    }
306}
307
308#[derive(Default)]
309struct PairListProcessor {
310    is_pair: bool,
311    has_string_key: bool,
312}
313
314impl PairListProcessor {
315    /// Returns true if the processor identified the supplied data as an array composed of
316    /// a key (string) and a value.
317    fn is_pair_array(&self) -> bool {
318        self.is_pair && self.has_string_key
319    }
320}
321
322impl Processor for PairListProcessor {
323    fn process_array<T>(
324        &mut self,
325        value: &mut Array<T>,
326        _meta: &mut Meta,
327        state: &ProcessingState<'_>,
328    ) -> ProcessingResult
329    where
330        T: ProcessValue,
331    {
332        self.is_pair = state.depth() == 0 && value.len() == 2;
333        if self.is_pair {
334            let key_type = ValueType::for_field(&value[0]);
335            process_value(
336                &mut value[0],
337                self,
338                &state.enter_index(0, state.inner_attrs(), key_type),
339            )?;
340        }
341
342        Ok(())
343    }
344
345    fn process_string(
346        &mut self,
347        _value: &mut String,
348        _meta: &mut Meta,
349        state: &ProcessingState<'_>,
350    ) -> ProcessingResult where {
351        if state.depth() == 1 && state.path().index() == Some(0) {
352            self.has_string_key = true;
353        }
354
355        Ok(())
356    }
357}
358
359fn is_pairlist<T: ProcessValue>(array: &mut Array<T>) -> bool {
360    for element in array.iter_mut() {
361        let mut visitor = PairListProcessor::default();
362        process_value(element, &mut visitor, ProcessingState::root()).ok();
363        if !visitor.is_pair_array() {
364            return false;
365        }
366    }
367
368    !array.is_empty()
369}
370
371/// Scrubs GraphQL variables from the event.
372pub fn scrub_graphql(event: &mut Event) {
373    let mut keys: BTreeSet<&str> = BTreeSet::new();
374
375    let mut is_graphql = false;
376
377    // Collect the variables keys and scrub them out.
378    if let Some(request) = event.request.value_mut()
379        && let Some(Value::Object(data)) = request.data.value_mut()
380    {
381        if let Some(api_target) = request.api_target.value()
382            && api_target.eq_ignore_ascii_case("graphql")
383        {
384            is_graphql = true;
385        }
386
387        if is_graphql
388            && let Some(Annotated(Some(Value::Object(variables)), _)) = data.get_mut("variables")
389        {
390            for (key, value) in variables.iter_mut() {
391                keys.insert(key);
392                value.set_value(Some(Value::String("[Filtered]".to_owned())));
393            }
394        }
395    }
396
397    if !is_graphql {
398        return;
399    }
400
401    // Scrub PII from the data object if they match the variables keys.
402    if let Some(contexts) = event.contexts.value_mut()
403        && let Some(response) = contexts.get_mut::<ResponseContext>()
404        && let Some(Value::Object(data)) = response.data.value_mut()
405        && let Some(Annotated(Some(Value::Object(graphql_data)), _)) = data.get_mut("data")
406    {
407        if !keys.is_empty() {
408            scrub_graphql_data(&keys, graphql_data);
409        } else {
410            // If we don't have the variable keys, we scrub the whole data object
411            // because the query or mutation weren't parameterized.
412            data.remove("data");
413        }
414    }
415}
416
417/// Scrubs values from the data object to `[Filtered]`.
418fn scrub_graphql_data(keys: &BTreeSet<&str>, data: &mut BTreeMap<String, Annotated<Value>>) {
419    for (key, value) in data.iter_mut() {
420        match value.value_mut() {
421            Some(Value::Object(item_data)) => {
422                scrub_graphql_data(keys, item_data);
423            }
424            _ => {
425                if keys.contains(key.as_str()) {
426                    value.set_value(Some(Value::String("[Filtered]".to_owned())));
427                }
428            }
429        }
430    }
431}
432
433fn apply_rule_to_value(
434    meta: &mut Meta,
435    rule: &RuleRef,
436    key: Option<&str>,
437    mut value: Option<&mut String>,
438) -> ProcessingResult {
439    // The rule might specify to remove or to redact. If redaction is chosen, we need to
440    // chunk up the value, otherwise we need to simply mark the value for deletion.
441    let should_redact_chunks = !matches!(rule.redaction, Redaction::Default | Redaction::Remove);
442
443    // In case the value is not a string (but a container, bool or number) and the rule matches on
444    // anything, we can only remove the value (not replace, hash, etc).
445    if rule.ty == RuleType::Anything && (value.is_none() || !should_redact_chunks) {
446        // The value is a container, @anything on a container can do nothing but delete.
447        meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
448        return Err(ProcessingAction::DeleteValueHard);
449    }
450
451    macro_rules! apply_regex {
452        ($regex:expr, $replace_behavior:expr) => {
453            if let Some(ref mut value) = value {
454                processor::process_chunked_value(value, meta, |chunks| {
455                    apply_regex_to_chunks(chunks, rule, $regex, $replace_behavior)
456                });
457            }
458        };
459    }
460
461    for (pattern_type, regex, replace_behavior) in regexes::get_regex_for_rule_type(&rule.ty) {
462        match pattern_type {
463            PatternType::KeyValue => {
464                if regex.is_match(key.unwrap_or("")) {
465                    if value.is_some() && should_redact_chunks {
466                        // If we're given a string value here, redact the value like we would with
467                        // @anything.
468                        apply_regex!(&ANYTHING_REGEX, replace_behavior);
469                    } else {
470                        meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
471                        return Err(ProcessingAction::DeleteValueHard);
472                    }
473                } else {
474                    // If we did not redact using the key, we will redact the entire value if the key
475                    // appears in it.
476                    apply_regex!(regex, replace_behavior);
477                }
478            }
479            PatternType::Value => {
480                apply_regex!(regex, replace_behavior);
481            }
482        }
483    }
484
485    Ok(())
486}
487
488fn apply_regex_to_chunks<'a>(
489    chunks: Vec<Chunk<'a>>,
490    rule: &RuleRef,
491    regex: &Regex,
492    replace_behavior: ReplaceBehavior,
493) -> Vec<Chunk<'a>> {
494    // NB: This function allocates the entire string and all chunks a second time. This means it
495    // cannot reuse chunks and reallocates them. Ideally, we would be able to run the regex directly
496    // on the chunks, but the `regex` crate does not support that.
497
498    let mut search_string = String::new();
499    let mut has_text = false;
500    for chunk in &chunks {
501        match chunk {
502            Chunk::Text { text } => {
503                has_text = true;
504                search_string.push_str(&text.replace('\x00', ""));
505            }
506            Chunk::Redaction { .. } => search_string.push('\x00'),
507        }
508    }
509
510    if !has_text {
511        // Nothing to replace.
512        return chunks;
513    }
514
515    // Early exit if this regex does not match and return the original chunks.
516    let mut captures_iter = regex.captures_iter(&search_string).peekable();
517    if captures_iter.peek().is_none() {
518        return chunks;
519    }
520
521    let mut replacement_chunks = vec![];
522    for chunk in chunks {
523        if let Chunk::Redaction { .. } = chunk {
524            replacement_chunks.push(chunk);
525        }
526    }
527    replacement_chunks.reverse();
528
529    fn process_text<'a>(
530        text: &str,
531        rv: &mut Vec<Chunk<'a>>,
532        replacement_chunks: &mut Vec<Chunk<'a>>,
533    ) {
534        if text.is_empty() {
535            return;
536        }
537
538        static NULL_SPLIT_RE: OnceLock<Regex> = OnceLock::new();
539        let regex = NULL_SPLIT_RE.get_or_init(|| {
540            #[allow(clippy::trivial_regex)]
541            Regex::new("\x00").unwrap()
542        });
543
544        let mut pos = 0;
545        for piece in regex.find_iter(text) {
546            rv.push(Chunk::Text {
547                text: Cow::Owned(text[pos..piece.start()].to_string()),
548            });
549            rv.push(replacement_chunks.pop().unwrap());
550            pos = piece.end();
551        }
552
553        rv.push(Chunk::Text {
554            text: Cow::Owned(text[pos..].to_string()),
555        });
556    }
557
558    let mut pos = 0;
559    let mut rv = Vec::with_capacity(replacement_chunks.len());
560
561    match replace_behavior {
562        ReplaceBehavior::Groups(ref groups) => {
563            for m in captures_iter {
564                for (idx, g) in m.iter().enumerate() {
565                    if let Some(g) = g
566                        && groups.contains(&(idx as u8))
567                    {
568                        process_text(
569                            &search_string[pos..g.start()],
570                            &mut rv,
571                            &mut replacement_chunks,
572                        );
573                        insert_replacement_chunks(rule, g.as_str(), &mut rv);
574                        pos = g.end();
575                    }
576                }
577            }
578            process_text(&search_string[pos..], &mut rv, &mut replacement_chunks);
579            debug_assert!(replacement_chunks.is_empty());
580        }
581        ReplaceBehavior::Value => {
582            // We only want to replace a string value, and the replacement chunk for that is
583            // inserted by insert_replacement_chunks. Adding chunks from replacement_chunks
584            // results in the incorrect behavior of a total of more chunks than the input.
585            insert_replacement_chunks(rule, &search_string, &mut rv);
586        }
587    }
588    rv
589}
590
591fn insert_replacement_chunks(rule: &RuleRef, text: &str, output: &mut Vec<Chunk<'_>>) {
592    match &rule.redaction {
593        Redaction::Default | Redaction::Remove => {
594            output.push(Chunk::Redaction {
595                text: Cow::Borrowed(""),
596                rule_id: Cow::Owned(rule.origin.to_string()),
597                ty: RemarkType::Removed,
598            });
599        }
600        Redaction::Mask => {
601            let buf = vec!['*'; text.chars().count()];
602
603            output.push(Chunk::Redaction {
604                ty: RemarkType::Masked,
605                rule_id: Cow::Owned(rule.origin.to_string()),
606                text: buf.into_iter().collect(),
607            })
608        }
609        Redaction::Hash => {
610            output.push(Chunk::Redaction {
611                ty: RemarkType::Pseudonymized,
612                rule_id: Cow::Owned(rule.origin.to_string()),
613                text: Cow::Owned(utils::hash_value(text.as_bytes())),
614            });
615        }
616        Redaction::Replace(replace) => {
617            output.push(Chunk::Redaction {
618                ty: RemarkType::Substituted,
619                rule_id: Cow::Owned(rule.origin.to_string()),
620                text: Cow::Owned(replace.text.clone()),
621            });
622        }
623        Redaction::Other => relay_log::debug!("Incoming redaction is not supported"),
624    }
625}
626
627#[cfg(test)]
628mod tests {
629    use insta::{allow_duplicates, assert_debug_snapshot};
630    use relay_event_schema::processor::process_value;
631    use relay_event_schema::protocol::{
632        Addr, Breadcrumb, DebugImage, DebugMeta, ExtraValue, Headers, LogEntry, Message,
633        NativeDebugImage, Request, Span, TagEntry, Tags, TraceContext,
634    };
635    use relay_protocol::{FromValue, Object, assert_annotated_snapshot, get_value};
636    use serde_json::json;
637
638    use super::*;
639    use crate::{DataScrubbingConfig, PiiConfig, ReplaceRedaction};
640
641    fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Option<PiiConfig> {
642        use crate::convert::to_pii_config as to_pii_config_impl;
643        let rv = to_pii_config_impl(datascrubbing_config).unwrap();
644        if let Some(ref config) = rv {
645            let roundtrip: PiiConfig =
646                serde_json::from_value(serde_json::to_value(config).unwrap()).unwrap();
647            assert_eq!(&roundtrip, config);
648        }
649        rv
650    }
651
652    #[test]
653    fn test_scrub_original_value() {
654        let mut data = Event::from_value(
655            json!({
656                "user": {
657                    "username": "hey  man 73.133.27.120", // should be stripped despite not being "known ip field"
658                    "ip_address": "is this an ip address? 73.133.27.120", //  <--------
659                },
660                "hpkp":"invalid data my ip address is  74.133.27.120 and my credit card number is  4571234567890111 ",
661            })
662            .into(),
663        );
664
665        let scrubbing_config = DataScrubbingConfig {
666            scrub_data: true,
667            scrub_ip_addresses: true,
668            scrub_defaults: true,
669            ..Default::default()
670        };
671
672        let pii_config = to_pii_config(&scrubbing_config).unwrap();
673        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
674
675        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
676
677        assert_debug_snapshot!(&data);
678    }
679
680    #[test]
681    fn test_sentry_user() {
682        let mut data = Event::from_value(
683            json!({
684                "user": {
685                    "ip_address": "73.133.27.120",
686                    "sentry_user": "ip:73.133.27.120",
687                },
688            })
689            .into(),
690        );
691
692        let scrubbing_config = DataScrubbingConfig {
693            scrub_data: true,
694            scrub_ip_addresses: true,
695            scrub_defaults: true,
696            ..Default::default()
697        };
698
699        let pii_config = to_pii_config(&scrubbing_config).unwrap();
700        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
701
702        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
703
704        assert_debug_snapshot!(&data);
705    }
706
707    #[test]
708    fn test_basic_stripping() {
709        let config = serde_json::from_str::<PiiConfig>(
710            r#"
711            {
712                "rules": {
713                    "remove_bad_headers": {
714                        "type": "redact_pair",
715                        "keyPattern": "(?i)cookie|secret[-_]?key"
716                    }
717                },
718                "applications": {
719                    "$string": ["@ip"],
720                    "$object.**": ["remove_bad_headers"]
721                }
722            }
723            "#,
724        )
725        .unwrap();
726
727        let mut event = Annotated::new(Event {
728            logentry: Annotated::new(LogEntry {
729                formatted: Annotated::new("Hello world!".to_owned().into()),
730                ..Default::default()
731            }),
732            request: Annotated::new(Request {
733                env: {
734                    let mut rv = Object::new();
735                    rv.insert(
736                        "SECRET_KEY".to_owned(),
737                        Annotated::new(Value::String("134141231231231231231312".into())),
738                    );
739                    Annotated::new(rv)
740                },
741                headers: {
742                    let rv = vec![
743                        Annotated::new((
744                            Annotated::new("Cookie".to_owned().into()),
745                            Annotated::new("super secret".to_owned().into()),
746                        )),
747                        Annotated::new((
748                            Annotated::new("X-Forwarded-For".to_owned().into()),
749                            Annotated::new("127.0.0.1".to_owned().into()),
750                        )),
751                    ];
752                    Annotated::new(Headers(PairList(rv)))
753                },
754                ..Default::default()
755            }),
756            tags: Annotated::new(Tags(
757                vec![Annotated::new(TagEntry(
758                    Annotated::new("forwarded_for".to_owned()),
759                    Annotated::new("127.0.0.1".to_owned()),
760                ))]
761                .into(),
762            )),
763            ..Default::default()
764        });
765
766        let mut processor = PiiProcessor::new(config.compiled());
767        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
768        assert_annotated_snapshot!(event);
769    }
770
771    #[test]
772    fn test_redact_containers() {
773        let config = serde_json::from_str::<PiiConfig>(
774            r#"
775            {
776                "applications": {
777                    "$object": ["@anything"]
778                }
779            }
780            "#,
781        )
782        .unwrap();
783
784        let mut event = Annotated::new(Event {
785            extra: {
786                let mut map = Object::new();
787                map.insert(
788                    "foo".to_owned(),
789                    Annotated::new(ExtraValue(Value::String("bar".to_owned()))),
790                );
791                Annotated::new(map)
792            },
793            ..Default::default()
794        });
795
796        let mut processor = PiiProcessor::new(config.compiled());
797        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
798        assert_annotated_snapshot!(event);
799    }
800
801    #[test]
802    fn test_redact_custom_pattern() {
803        let config = serde_json::from_str::<PiiConfig>(
804            r#"
805            {
806                "applications": {
807                    "$string": ["myrule"]
808                },
809                "rules": {
810                    "myrule": {
811                        "type": "pattern",
812                        "pattern": "foo",
813                        "redaction": {
814                            "method": "replace",
815                            "text": "asd"
816                        }
817                    }
818                }
819            }
820            "#,
821        )
822        .unwrap();
823
824        let mut event = Annotated::new(Event {
825            extra: {
826                let mut map = Object::new();
827                map.insert(
828                    "myvalue".to_owned(),
829                    Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
830                );
831                Annotated::new(map)
832            },
833            ..Default::default()
834        });
835
836        let mut processor = PiiProcessor::new(config.compiled());
837        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
838        assert_annotated_snapshot!(event);
839    }
840
841    #[test]
842    fn test_no_field_upsert() {
843        let config = serde_json::from_str::<PiiConfig>(
844            r#"
845            {
846                "applications": {
847                    "**": ["@anything:remove"]
848                }
849            }
850            "#,
851        )
852        .unwrap();
853
854        let mut event = Annotated::new(Event {
855            extra: {
856                let mut map = Object::new();
857                map.insert(
858                    "myvalue".to_owned(),
859                    Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
860                );
861                Annotated::new(map)
862            },
863            ..Default::default()
864        });
865
866        let mut processor = PiiProcessor::new(config.compiled());
867        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
868        assert_annotated_snapshot!(event);
869    }
870
871    #[test]
872    fn test_anything_hash_on_string() {
873        let config = serde_json::from_str::<PiiConfig>(
874            r#"
875            {
876                "applications": {
877                    "$string": ["@anything:hash"]
878                }
879            }
880            "#,
881        )
882        .unwrap();
883
884        let mut event = Annotated::new(Event {
885            extra: {
886                let mut map = Object::new();
887                map.insert(
888                    "myvalue".to_owned(),
889                    Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
890                );
891                Annotated::new(map)
892            },
893            ..Default::default()
894        });
895
896        let mut processor = PiiProcessor::new(config.compiled());
897        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
898        assert_annotated_snapshot!(event);
899    }
900
901    #[test]
902    fn test_anything_hash_on_container() {
903        let config = serde_json::from_str::<PiiConfig>(
904            r#"
905            {
906                "applications": {
907                    "$object": ["@anything:hash"]
908                }
909            }
910            "#,
911        )
912        .unwrap();
913
914        let mut event = Annotated::new(Event {
915            extra: {
916                let mut map = Object::new();
917                map.insert(
918                    "myvalue".to_owned(),
919                    Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
920                );
921                Annotated::new(map)
922            },
923            ..Default::default()
924        });
925
926        let mut processor = PiiProcessor::new(config.compiled());
927        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
928        assert_annotated_snapshot!(event);
929    }
930
931    #[test]
932    fn test_ignore_user_agent_ip_scrubbing() {
933        let mut data = Event::from_value(
934            json!({
935                "request": {
936                    "headers": [
937                        ["User-Agent", "127.0.0.1"],
938                        ["X-Client-Ip", "10.0.0.1"]
939                    ]
940                },
941            })
942            .into(),
943        );
944
945        let scrubbing_config = DataScrubbingConfig {
946            scrub_data: true,
947            scrub_ip_addresses: true,
948            scrub_defaults: true,
949            ..Default::default()
950        };
951
952        let pii_config = to_pii_config(&scrubbing_config).unwrap();
953        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
954
955        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
956
957        assert_annotated_snapshot!(&data);
958    }
959
960    #[test]
961    fn test_remove_debugmeta_path() {
962        let config = serde_json::from_str::<PiiConfig>(
963            r#"
964            {
965                "applications": {
966                    "debug_meta.images.*.code_file": ["@anything:remove"],
967                    "debug_meta.images.*.debug_file": ["@anything:remove"]
968                }
969            }
970            "#,
971        )
972        .unwrap();
973
974        let mut event = Annotated::new(Event {
975            debug_meta: Annotated::new(DebugMeta {
976                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
977                    NativeDebugImage {
978                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
979                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
980                        debug_id: Annotated::new(
981                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
982                        ),
983                        debug_file: Annotated::new("wntdll.pdb".into()),
984                        debug_checksum: Annotated::empty(),
985                        arch: Annotated::new("arm64".to_owned()),
986                        image_addr: Annotated::new(Addr(0)),
987                        image_size: Annotated::new(4096),
988                        image_vmaddr: Annotated::new(Addr(32768)),
989                        other: {
990                            let mut map = Object::new();
991                            map.insert(
992                                "other".to_owned(),
993                                Annotated::new(Value::String("value".to_owned())),
994                            );
995                            map
996                        },
997                    },
998                )))]),
999                ..Default::default()
1000            }),
1001            ..Default::default()
1002        });
1003
1004        let mut processor = PiiProcessor::new(config.compiled());
1005        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1006        assert_annotated_snapshot!(event);
1007    }
1008
1009    #[test]
1010    fn test_replace_debugmeta_path() {
1011        let config = serde_json::from_str::<PiiConfig>(
1012            r#"
1013            {
1014                "applications": {
1015                    "debug_meta.images.*.code_file": ["@anything:replace"],
1016                    "debug_meta.images.*.debug_file": ["@anything:replace"]
1017                }
1018            }
1019            "#,
1020        )
1021        .unwrap();
1022
1023        let mut event = Annotated::new(Event {
1024            debug_meta: Annotated::new(DebugMeta {
1025                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1026                    NativeDebugImage {
1027                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1028                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1029                        debug_id: Annotated::new(
1030                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1031                        ),
1032                        debug_file: Annotated::new("wntdll.pdb".into()),
1033                        debug_checksum: Annotated::empty(),
1034                        arch: Annotated::new("arm64".to_owned()),
1035                        image_addr: Annotated::new(Addr(0)),
1036                        image_size: Annotated::new(4096),
1037                        image_vmaddr: Annotated::new(Addr(32768)),
1038                        other: {
1039                            let mut map = Object::new();
1040                            map.insert(
1041                                "other".to_owned(),
1042                                Annotated::new(Value::String("value".to_owned())),
1043                            );
1044                            map
1045                        },
1046                    },
1047                )))]),
1048                ..Default::default()
1049            }),
1050            ..Default::default()
1051        });
1052
1053        let mut processor = PiiProcessor::new(config.compiled());
1054        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1055        assert_annotated_snapshot!(event);
1056    }
1057
1058    #[test]
1059    fn test_hash_debugmeta_path() {
1060        let config = serde_json::from_str::<PiiConfig>(
1061            r#"
1062            {
1063                "applications": {
1064                    "debug_meta.images.*.code_file": ["@anything:hash"],
1065                    "debug_meta.images.*.debug_file": ["@anything:hash"]
1066                }
1067            }
1068            "#,
1069        )
1070        .unwrap();
1071
1072        let mut event = Annotated::new(Event {
1073            debug_meta: Annotated::new(DebugMeta {
1074                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1075                    NativeDebugImage {
1076                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1077                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1078                        debug_id: Annotated::new(
1079                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1080                        ),
1081                        debug_file: Annotated::new("wntdll.pdb".into()),
1082                        debug_checksum: Annotated::empty(),
1083                        arch: Annotated::new("arm64".to_owned()),
1084                        image_addr: Annotated::new(Addr(0)),
1085                        image_size: Annotated::new(4096),
1086                        image_vmaddr: Annotated::new(Addr(32768)),
1087                        other: {
1088                            let mut map = Object::new();
1089                            map.insert(
1090                                "other".to_owned(),
1091                                Annotated::new(Value::String("value".to_owned())),
1092                            );
1093                            map
1094                        },
1095                    },
1096                )))]),
1097                ..Default::default()
1098            }),
1099            ..Default::default()
1100        });
1101
1102        let mut processor = PiiProcessor::new(config.compiled());
1103        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1104        assert_annotated_snapshot!(event);
1105    }
1106
1107    #[test]
1108    fn test_debugmeta_path_not_addressible_with_wildcard_selector() {
1109        let config = serde_json::from_str::<PiiConfig>(
1110            r#"
1111            {
1112                "applications": {
1113                    "$string": ["@anything:remove"],
1114                    "**": ["@anything:remove"],
1115                    "debug_meta.**": ["@anything:remove"],
1116                    "(debug_meta.images.**.code_file & $string)": ["@anything:remove"]
1117                }
1118            }
1119            "#,
1120        )
1121        .unwrap();
1122
1123        let mut event = Annotated::new(Event {
1124            debug_meta: Annotated::new(DebugMeta {
1125                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1126                    NativeDebugImage {
1127                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1128                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1129                        debug_id: Annotated::new(
1130                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1131                        ),
1132                        debug_file: Annotated::new("wntdll.pdb".into()),
1133                        debug_checksum: Annotated::empty(),
1134                        arch: Annotated::new("arm64".to_owned()),
1135                        image_addr: Annotated::new(Addr(0)),
1136                        image_size: Annotated::new(4096),
1137                        image_vmaddr: Annotated::new(Addr(32768)),
1138                        other: {
1139                            let mut map = Object::new();
1140                            map.insert(
1141                                "other".to_owned(),
1142                                Annotated::new(Value::String("value".to_owned())),
1143                            );
1144                            map
1145                        },
1146                    },
1147                )))]),
1148                ..Default::default()
1149            }),
1150            ..Default::default()
1151        });
1152
1153        let mut processor = PiiProcessor::new(config.compiled());
1154        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1155        assert_annotated_snapshot!(event);
1156    }
1157
1158    #[test]
1159    fn test_quoted_keys() {
1160        let config = serde_json::from_str::<PiiConfig>(
1161            r#"
1162            {
1163                "applications": {
1164                    "extra.'special ,./<>?!@#$%^&*())''gärbage'''": ["@anything:remove"]
1165                }
1166            }
1167            "#,
1168        )
1169        .unwrap();
1170
1171        let mut event = Annotated::new(Event {
1172            extra: {
1173                let mut map = Object::new();
1174                map.insert(
1175                    "do not ,./<>?!@#$%^&*())'ßtrip'".to_owned(),
1176                    Annotated::new(ExtraValue(Value::String("foo".to_owned()))),
1177                );
1178                map.insert(
1179                    "special ,./<>?!@#$%^&*())'gärbage'".to_owned(),
1180                    Annotated::new(ExtraValue(Value::String("bar".to_owned()))),
1181                );
1182                Annotated::new(map)
1183            },
1184            ..Default::default()
1185        });
1186
1187        let mut processor = PiiProcessor::new(config.compiled());
1188        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1189        assert_annotated_snapshot!(event);
1190    }
1191
1192    #[test]
1193    fn test_logentry_value_types() {
1194        // Assert that logentry.formatted is addressable as $string, $message and $logentry.formatted.
1195        for formatted_selector in &[
1196            "$logentry.formatted",
1197            "$message",
1198            "$logentry.formatted && $message",
1199            "$string",
1200        ] {
1201            let config = serde_json::from_str::<PiiConfig>(&format!(
1202                r##"
1203                {{
1204                    "applications": {{
1205                        "{formatted_selector}": ["@anything:remove"]
1206                    }}
1207                }}
1208                "##
1209            ))
1210            .unwrap();
1211
1212            let mut event = Annotated::new(Event {
1213                logentry: Annotated::new(LogEntry {
1214                    formatted: Annotated::new("Hello world!".to_owned().into()),
1215                    ..Default::default()
1216                }),
1217                ..Default::default()
1218            });
1219
1220            let mut processor = PiiProcessor::new(config.compiled());
1221            process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1222            assert!(
1223                event
1224                    .value()
1225                    .unwrap()
1226                    .logentry
1227                    .value()
1228                    .unwrap()
1229                    .formatted
1230                    .value()
1231                    .is_none()
1232            );
1233        }
1234    }
1235
1236    #[test]
1237    fn test_logentry_formatted_never_fully_filtered() {
1238        // Test that logentry.formatted gets smart PII scrubbing via to_pii_config
1239        // and is never completely filtered even with aggressive PII rules
1240        let config = crate::convert::to_pii_config(&crate::DataScrubbingConfig {
1241            scrub_data: true,
1242            scrub_defaults: true,
1243            scrub_ip_addresses: true,
1244            ..Default::default()
1245        })
1246        .unwrap()
1247        .unwrap();
1248
1249        let mut event = Annotated::new(Event {
1250            logentry: Annotated::new(LogEntry {
1251                formatted: Annotated::new(
1252                    "User john.doe@company.com failed login with card 4111-1111-1111-1111"
1253                        .to_owned()
1254                        .into(),
1255                ),
1256                ..Default::default()
1257            }),
1258            ..Default::default()
1259        });
1260
1261        let mut processor = PiiProcessor::new(config.compiled());
1262        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1263        assert_annotated_snapshot!(event, @r#"
1264        {
1265          "logentry": {
1266            "formatted": "User [email] failed login with card [creditcard]"
1267          },
1268          "_meta": {
1269            "logentry": {
1270              "formatted": {
1271                "": {
1272                  "rem": [
1273                    [
1274                      "@email:replace",
1275                      "s",
1276                      5,
1277                      12
1278                    ],
1279                    [
1280                      "@creditcard:replace",
1281                      "s",
1282                      36,
1283                      48
1284                    ]
1285                  ],
1286                  "len": 68
1287                }
1288              }
1289            }
1290          }
1291        }
1292        "#);
1293    }
1294
1295    #[test]
1296    fn test_logentry_formatted_bearer_token_scrubbing() {
1297        // Test that bearer tokens are properly scrubbed in logentry.formatted
1298        let config = crate::convert::to_pii_config(&crate::DataScrubbingConfig {
1299            scrub_data: true,
1300            scrub_defaults: true,
1301            ..Default::default()
1302        })
1303        .unwrap()
1304        .unwrap();
1305
1306        let mut event = Annotated::new(Event {
1307            logentry: Annotated::new(LogEntry {
1308                formatted: Annotated::new(
1309                    "API request failed with Bearer ABC123XYZ789TOKEN and other data"
1310                        .to_owned()
1311                        .into(),
1312                ),
1313                ..Default::default()
1314            }),
1315            ..Default::default()
1316        });
1317
1318        let mut processor = PiiProcessor::new(config.compiled());
1319        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1320        assert_annotated_snapshot!(event, @r#"
1321        {
1322          "logentry": {
1323            "formatted": "API request failed with Bearer [token] and other data"
1324          },
1325          "_meta": {
1326            "logentry": {
1327              "formatted": {
1328                "": {
1329                  "rem": [
1330                    [
1331                      "@bearer:replace",
1332                      "s",
1333                      24,
1334                      38
1335                    ]
1336                  ],
1337                  "len": 63
1338                }
1339              }
1340            }
1341          }
1342        }
1343        "#);
1344    }
1345
1346    #[test]
1347    fn test_logentry_formatted_password_word_not_scrubbed() {
1348        let config = PiiConfig::default();
1349        let mut event = Annotated::new(Event {
1350            logentry: Annotated::new(LogEntry {
1351                formatted: Annotated::new(
1352                    "User password is secret123 for authentication"
1353                        .to_owned()
1354                        .into(),
1355                ),
1356                ..Default::default()
1357            }),
1358            ..Default::default()
1359        });
1360
1361        let mut processor = PiiProcessor::new(config.compiled());
1362        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1363        assert_annotated_snapshot!(event, @r#"
1364        {
1365          "logentry": {
1366            "formatted": "User password is secret123 for authentication"
1367          }
1368        }
1369        "#);
1370    }
1371
1372    #[test]
1373    fn test_ip_address_hashing() {
1374        let config = serde_json::from_str::<PiiConfig>(
1375            r#"
1376            {
1377                "applications": {
1378                    "$user.ip_address": ["@ip:hash"]
1379                }
1380            }
1381            "#,
1382        )
1383        .unwrap();
1384
1385        let mut event = Annotated::new(Event {
1386            user: Annotated::new(User {
1387                ip_address: Annotated::new(IpAddr("127.0.0.1".to_owned())),
1388                ..Default::default()
1389            }),
1390            ..Default::default()
1391        });
1392
1393        let mut processor = PiiProcessor::new(config.compiled());
1394        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1395
1396        let user = event.value().unwrap().user.value().unwrap();
1397
1398        assert!(user.ip_address.value().is_none());
1399
1400        assert_eq!(
1401            user.id.value().unwrap().as_str(),
1402            "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1403        );
1404    }
1405
1406    #[test]
1407    fn test_ip_address_hashing_does_not_overwrite_id() {
1408        let config = serde_json::from_str::<PiiConfig>(
1409            r#"
1410            {
1411                "applications": {
1412                    "$user.ip_address": ["@ip:hash"]
1413                }
1414            }
1415            "#,
1416        )
1417        .unwrap();
1418
1419        let mut event = Annotated::new(Event {
1420            user: Annotated::new(User {
1421                id: Annotated::new("123".to_owned().into()),
1422                ip_address: Annotated::new(IpAddr("127.0.0.1".to_owned())),
1423                ..Default::default()
1424            }),
1425            ..Default::default()
1426        });
1427
1428        let mut processor = PiiProcessor::new(config.compiled());
1429        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1430
1431        let user = event.value().unwrap().user.value().unwrap();
1432
1433        // This will get wiped out in renormalization though
1434        assert_eq!(
1435            user.ip_address.value().unwrap().as_str(),
1436            "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1437        );
1438
1439        assert_eq!(user.id.value().unwrap().as_str(), "123");
1440    }
1441
1442    #[test]
1443    fn test_replace_replaced_text() {
1444        let chunks = vec![Chunk::Redaction {
1445            text: "[ip]".into(),
1446            rule_id: "@ip".into(),
1447            ty: RemarkType::Substituted,
1448        }];
1449        let rule = RuleRef {
1450            id: "@ip:replace".into(),
1451            origin: "@ip".into(),
1452            ty: RuleType::Ip,
1453            redaction: Redaction::Replace(ReplaceRedaction {
1454                text: "[ip]".into(),
1455            }),
1456        };
1457        let res = apply_regex_to_chunks(
1458            chunks.clone(),
1459            &rule,
1460            &Regex::new(r#".*"#).unwrap(),
1461            ReplaceBehavior::Value,
1462        );
1463        assert_eq!(chunks, res);
1464    }
1465
1466    #[test]
1467    fn test_replace_replaced_text_anything() {
1468        let chunks = vec![Chunk::Redaction {
1469            text: "[Filtered]".into(),
1470            rule_id: "@password:filter".into(),
1471            ty: RemarkType::Substituted,
1472        }];
1473        let rule = RuleRef {
1474            id: "@anything:filter".into(),
1475            origin: "@anything:filter".into(),
1476            ty: RuleType::Anything,
1477            redaction: Redaction::Replace(ReplaceRedaction {
1478                text: "[Filtered]".into(),
1479            }),
1480        };
1481        let res = apply_regex_to_chunks(
1482            chunks.clone(),
1483            &rule,
1484            &Regex::new(r#".*"#).unwrap(),
1485            ReplaceBehavior::Groups(smallvec::smallvec![0]),
1486        );
1487        assert_eq!(chunks, res);
1488    }
1489
1490    #[test]
1491    fn test_trace_route_params_scrubbed() {
1492        let mut trace_context: Annotated<TraceContext> = Annotated::from_json(
1493            r#"
1494            {
1495                "type": "trace",
1496                "trace_id": "4c79f60c11214eb38604f4ae0781bfb2",
1497                "span_id": "fa90fdead5f74052",
1498                "data": {
1499                    "previousRoute": {
1500                        "params": {
1501                            "password": "test"
1502                        }
1503                    }
1504                }
1505            }
1506            "#,
1507        )
1508        .unwrap();
1509
1510        let ds_config = DataScrubbingConfig {
1511            scrub_data: true,
1512            scrub_defaults: true,
1513            ..Default::default()
1514        };
1515        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1516        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1517
1518        process_value(
1519            &mut trace_context,
1520            &mut pii_processor,
1521            ProcessingState::root(),
1522        )
1523        .unwrap();
1524        assert_annotated_snapshot!(trace_context);
1525    }
1526
1527    #[test]
1528    fn test_scrub_span_data_http_not_scrubbed() {
1529        let mut span: Annotated<Span> = Annotated::from_json(
1530            r#"{
1531                "data": {
1532                    "http": {
1533                        "query": "dance=true"
1534                    }
1535                }
1536            }"#,
1537        )
1538        .unwrap();
1539
1540        let ds_config = DataScrubbingConfig {
1541            scrub_data: true,
1542            scrub_defaults: true,
1543            ..Default::default()
1544        };
1545        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1546        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1547
1548        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1549        assert_annotated_snapshot!(span);
1550    }
1551
1552    #[test]
1553    fn test_scrub_span_data_http_strings_are_scrubbed() {
1554        let mut span: Annotated<Span> = Annotated::from_json(
1555            r#"{
1556                "data": {
1557                    "http": {
1558                        "query": "ccnumber=5105105105105100&process_id=123",
1559                        "fragment": "ccnumber=5105105105105100,process_id=123"
1560                    }
1561                }
1562            }"#,
1563        )
1564        .unwrap();
1565
1566        let ds_config = DataScrubbingConfig {
1567            scrub_data: true,
1568            scrub_defaults: true,
1569            ..Default::default()
1570        };
1571        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1572        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1573
1574        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1575        assert_annotated_snapshot!(span);
1576    }
1577
1578    #[test]
1579    fn test_scrub_span_data_http_objects_are_scrubbed() {
1580        let mut span: Annotated<Span> = Annotated::from_json(
1581            r#"{
1582                "data": {
1583                    "http": {
1584                        "query": {
1585                            "ccnumber": "5105105105105100",
1586                            "process_id": "123"
1587                        },
1588                        "fragment": {
1589                            "ccnumber": "5105105105105100",
1590                            "process_id": "123"
1591                        }
1592                    }
1593                }
1594            }"#,
1595        )
1596        .unwrap();
1597
1598        let ds_config = DataScrubbingConfig {
1599            scrub_data: true,
1600            scrub_defaults: true,
1601            ..Default::default()
1602        };
1603        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1604        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1605
1606        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1607        assert_annotated_snapshot!(span);
1608    }
1609
1610    #[test]
1611    fn test_scrub_span_data_untyped_props_are_scrubbed() {
1612        let mut span: Annotated<Span> = Annotated::from_json(
1613            r#"{
1614                "data": {
1615                    "untyped": "ccnumber=5105105105105100",
1616                    "more_untyped": {
1617                        "typed": "no",
1618                        "scrubbed": "yes",
1619                        "ccnumber": "5105105105105100"
1620                    }
1621                }
1622            }"#,
1623        )
1624        .unwrap();
1625
1626        let ds_config = DataScrubbingConfig {
1627            scrub_data: true,
1628            scrub_defaults: true,
1629            ..Default::default()
1630        };
1631        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1632        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1633
1634        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1635        assert_annotated_snapshot!(span);
1636    }
1637
1638    #[test]
1639    fn test_span_data_pii() {
1640        let mut span = Span::from_value(
1641            json!({
1642                "data": {
1643                    "code.filepath": "src/sentry/api/authentication.py",
1644                }
1645            })
1646            .into(),
1647        );
1648
1649        let ds_config = DataScrubbingConfig {
1650            scrub_data: true,
1651            scrub_defaults: true,
1652            ..Default::default()
1653        };
1654        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1655
1656        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1657        processor::process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1658        assert_eq!(
1659            get_value!(span.data.code_filepath!).as_str(),
1660            Some("src/sentry/api/authentication.py")
1661        );
1662    }
1663
1664    #[test]
1665    fn test_csp_source_file_pii() {
1666        let mut event = Event::from_value(
1667            json!({
1668                "csp": {
1669                    "source_file": "authentication.js",
1670                }
1671            })
1672            .into(),
1673        );
1674
1675        let config = serde_json::from_str::<PiiConfig>(
1676            r#"
1677            {
1678                "applications": {
1679                    "csp.source_file": ["@anything:filter"]
1680                }
1681            }
1682            "#,
1683        )
1684        .unwrap();
1685
1686        let mut pii_processor = PiiProcessor::new(config.compiled());
1687        processor::process_value(&mut event, &mut pii_processor, ProcessingState::root()).unwrap();
1688        assert_eq!(get_value!(event.csp.source_file!).as_str(), "[Filtered]");
1689    }
1690
1691    #[test]
1692    fn test_scrub_breadcrumb_data_http_not_scrubbed() {
1693        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1694            r#"{
1695                "data": {
1696                    "http": {
1697                        "query": "dance=true"
1698                    }
1699                }
1700            }"#,
1701        )
1702        .unwrap();
1703
1704        let ds_config = DataScrubbingConfig {
1705            scrub_data: true,
1706            scrub_defaults: true,
1707            ..Default::default()
1708        };
1709        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1710        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1711        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1712        assert_annotated_snapshot!(breadcrumb);
1713    }
1714
1715    #[test]
1716    fn test_scrub_breadcrumb_data_http_strings_are_scrubbed() {
1717        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1718            r#"{
1719                "data": {
1720                    "http": {
1721                        "query": "ccnumber=5105105105105100&process_id=123",
1722                        "fragment": "ccnumber=5105105105105100,process_id=123"
1723                    }
1724                }
1725            }"#,
1726        )
1727        .unwrap();
1728
1729        let ds_config = DataScrubbingConfig {
1730            scrub_data: true,
1731            scrub_defaults: true,
1732            ..Default::default()
1733        };
1734        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1735        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1736        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1737        assert_annotated_snapshot!(breadcrumb);
1738    }
1739
1740    #[test]
1741    fn test_scrub_breadcrumb_data_http_objects_are_scrubbed() {
1742        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1743            r#"{
1744                "data": {
1745                    "http": {
1746                        "query": {
1747                            "ccnumber": "5105105105105100",
1748                            "process_id": "123"
1749                        },
1750                        "fragment": {
1751                            "ccnumber": "5105105105105100",
1752                            "process_id": "123"
1753                        }
1754                    }
1755                }
1756            }"#,
1757        )
1758        .unwrap();
1759
1760        let ds_config = DataScrubbingConfig {
1761            scrub_data: true,
1762            scrub_defaults: true,
1763            ..Default::default()
1764        };
1765        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1766        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1767
1768        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1769        assert_annotated_snapshot!(breadcrumb);
1770    }
1771
1772    #[test]
1773    fn test_scrub_breadcrumb_data_untyped_props_are_scrubbed() {
1774        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1775            r#"{
1776                "data": {
1777                    "untyped": "ccnumber=5105105105105100",
1778                    "more_untyped": {
1779                        "typed": "no",
1780                        "scrubbed": "yes",
1781                        "ccnumber": "5105105105105100"
1782                    }
1783                }
1784            }"#,
1785        )
1786        .unwrap();
1787
1788        let ds_config = DataScrubbingConfig {
1789            scrub_data: true,
1790            scrub_defaults: true,
1791            ..Default::default()
1792        };
1793        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1794        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1795        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1796        assert_annotated_snapshot!(breadcrumb);
1797    }
1798
1799    #[test]
1800    fn test_scrub_graphql_response_data_with_variables() {
1801        let mut data = Event::from_value(
1802            json!({
1803              "request": {
1804                "data": {
1805                  "query": "{\n  viewer {\n    login\n  }\n}",
1806                  "variables": {
1807                    "login": "foo"
1808                  }
1809                },
1810                "api_target": "graphql"
1811              },
1812              "contexts": {
1813                "response": {
1814                  "type": "response",
1815                  "data": {
1816                    "data": {
1817                      "viewer": {
1818                        "login": "foo"
1819                      }
1820                    }
1821                  }
1822                }
1823              }
1824            })
1825            .into(),
1826        );
1827
1828        scrub_graphql(data.value_mut().as_mut().unwrap());
1829
1830        assert_debug_snapshot!(&data);
1831    }
1832
1833    #[test]
1834    fn test_scrub_graphql_response_data_without_variables() {
1835        let mut data = Event::from_value(
1836            json!({
1837              "request": {
1838                "data": {
1839                  "query": "{\n  viewer {\n    login\n  }\n}"
1840                },
1841                "api_target": "graphql"
1842              },
1843              "contexts": {
1844                "response": {
1845                  "type": "response",
1846                  "data": {
1847                    "data": {
1848                      "viewer": {
1849                        "login": "foo"
1850                      }
1851                    }
1852                  }
1853                }
1854              }
1855            })
1856            .into(),
1857        );
1858
1859        scrub_graphql(data.value_mut().as_mut().unwrap());
1860        assert_debug_snapshot!(&data);
1861    }
1862
1863    #[test]
1864    fn test_does_not_scrub_if_no_graphql() {
1865        let mut data = Event::from_value(
1866            json!({
1867              "request": {
1868                "data": {
1869                  "query": "{\n  viewer {\n    login\n  }\n}",
1870                  "variables": {
1871                    "login": "foo"
1872                  }
1873                },
1874              },
1875              "contexts": {
1876                "response": {
1877                  "type": "response",
1878                  "data": {
1879                    "data": {
1880                      "viewer": {
1881                        "login": "foo"
1882                      }
1883                    }
1884                  }
1885                }
1886              }
1887            })
1888            .into(),
1889        );
1890
1891        let scrubbing_config = DataScrubbingConfig {
1892            scrub_data: true,
1893            scrub_ip_addresses: true,
1894            scrub_defaults: true,
1895            ..Default::default()
1896        };
1897
1898        let pii_config = to_pii_config(&scrubbing_config).unwrap();
1899        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1900
1901        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1902
1903        assert_debug_snapshot!(&data);
1904    }
1905
1906    #[test]
1907    fn test_logentry_params_scrubbed() {
1908        let config = serde_json::from_str::<PiiConfig>(
1909            r##"
1910                {
1911                    "applications": {
1912                        "$string": ["@anything:remove"]
1913                    }
1914                }
1915                "##,
1916        )
1917        .unwrap();
1918
1919        let mut event = Annotated::new(Event {
1920            logentry: Annotated::new(LogEntry {
1921                message: Annotated::new(Message::from("failed to parse report id=%s".to_owned())),
1922                formatted: Annotated::new("failed to parse report id=1".to_owned().into()),
1923                params: Annotated::new(Value::Array(vec![Annotated::new(Value::String(
1924                    "12345".to_owned(),
1925                ))])),
1926                ..Default::default()
1927            }),
1928            ..Default::default()
1929        });
1930
1931        let mut processor = PiiProcessor::new(config.compiled());
1932        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1933
1934        let params = get_value!(event.logentry.params!);
1935        assert_debug_snapshot!(params, @r###"
1936        Array(
1937            [
1938                Meta {
1939                    remarks: [
1940                        Remark {
1941                            ty: Removed,
1942                            rule_id: "@anything:remove",
1943                            range: None,
1944                        },
1945                    ],
1946                    errors: [],
1947                    original_length: None,
1948                    original_value: None,
1949                },
1950            ],
1951        )
1952        "###);
1953    }
1954
1955    #[test]
1956    fn test_is_pairlist() {
1957        for (case, expected) in [
1958            (r#"[]"#, false),
1959            (r#"["foo"]"#, false),
1960            (r#"["foo", 123]"#, false),
1961            (r#"[[1, "foo"]]"#, false),
1962            (r#"[[["too_nested", 123]]]"#, false),
1963            (r#"[["foo", "bar"], [1, "foo"]]"#, false),
1964            (r#"[["foo", "bar"], ["foo", "bar", "baz"]]"#, false),
1965            (r#"[["foo", "bar", "baz"], ["foo", "bar"]]"#, false),
1966            (r#"["foo", ["bar", "baz"], ["foo", "bar"]]"#, false),
1967            (r#"[["foo", "bar"], [["too_nested", 123]]]"#, false),
1968            (r#"[["foo", 123]]"#, true),
1969            (r#"[["foo", "bar"]]"#, true),
1970            (
1971                r#"[["foo", "bar"], ["foo", {"nested": {"something": 1}}]]"#,
1972                true,
1973            ),
1974        ] {
1975            let v = Annotated::<Value>::from_json(case).unwrap();
1976            let Annotated(Some(Value::Array(mut a)), _) = v else {
1977                panic!()
1978            };
1979            assert_eq!(is_pairlist(&mut a), expected, "{case}");
1980        }
1981    }
1982
1983    #[test]
1984    fn test_tuple_array_scrubbed_with_path_selector() {
1985        // We expect that both of these configs express the same semantics.
1986        let configs = vec![
1987            // This configuration matches on the authorization element (the 1st element of the array
1988            // represents the key).
1989            r##"
1990                {
1991                    "applications": {
1992                        "exception.values.0.stacktrace.frames.0.vars.headers.authorization": ["@anything:replace"]
1993                    }
1994                }
1995                "##,
1996            // This configuration matches on the 2nd element of the array.
1997            r##"
1998                {
1999                    "applications": {
2000                        "exception.values.0.stacktrace.frames.0.vars.headers.0.1": ["@anything:replace"]
2001                    }
2002                }
2003                "##,
2004        ];
2005
2006        let mut event = Event::from_value(
2007            serde_json::json!(
2008            {
2009              "message": "hi",
2010              "exception": {
2011                "values": [
2012                  {
2013                    "type": "BrokenException",
2014                    "value": "Something failed",
2015                    "stacktrace": {
2016                      "frames": [
2017                        {
2018                            "vars": {
2019                                "headers": [
2020                                    ["authorization", "Bearer abc123"]
2021                                ]
2022                            }
2023                        }
2024                      ]
2025                    }
2026                  }
2027                ]
2028              }
2029            })
2030            .into(),
2031        );
2032
2033        for config in configs {
2034            let config = serde_json::from_str::<PiiConfig>(config).unwrap();
2035            let mut processor = PiiProcessor::new(config.compiled());
2036            process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2037
2038            let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
2039
2040            allow_duplicates!(assert_debug_snapshot!(vars, @r###"
2041                              FrameVars(
2042                                  {
2043                                      "headers": Array(
2044                                          [
2045                                              Array(
2046                                                  [
2047                                                      String(
2048                                                          "authorization",
2049                                                      ),
2050                                                      Annotated(
2051                                                          String(
2052                                                              "[Filtered]",
2053                                                          ),
2054                                                          Meta {
2055                                                              remarks: [
2056                                                                  Remark {
2057                                                                      ty: Substituted,
2058                                                                      rule_id: "@anything:replace",
2059                                                                      range: Some(
2060                                                                          (
2061                                                                              0,
2062                                                                              10,
2063                                                                          ),
2064                                                                      ),
2065                                                                  },
2066                                                              ],
2067                                                              errors: [],
2068                                                              original_length: Some(
2069                                                                  13,
2070                                                              ),
2071                                                              original_value: None,
2072                                                          },
2073                                                      ),
2074                                                  ],
2075                                              ),
2076                                          ],
2077                                      ),
2078                                  },
2079                              )
2080                              "###));
2081        }
2082    }
2083
2084    #[test]
2085    fn test_tuple_array_scrubbed_with_string_selector_and_password_matcher() {
2086        let config = serde_json::from_str::<PiiConfig>(
2087            r##"
2088                {
2089                    "applications": {
2090                        "$string": ["@password:remove"]
2091                    }
2092                }
2093                "##,
2094        )
2095        .unwrap();
2096
2097        let mut event = Event::from_value(
2098            serde_json::json!(
2099            {
2100              "message": "hi",
2101              "exception": {
2102                "values": [
2103                  {
2104                    "type": "BrokenException",
2105                    "value": "Something failed",
2106                    "stacktrace": {
2107                      "frames": [
2108                        {
2109                            "vars": {
2110                                "headers": [
2111                                    ["authorization", "abc123"]
2112                                ]
2113                            }
2114                        }
2115                      ]
2116                    }
2117                  }
2118                ]
2119              }
2120            })
2121            .into(),
2122        );
2123
2124        let mut processor = PiiProcessor::new(config.compiled());
2125        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2126
2127        let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
2128
2129        assert_debug_snapshot!(vars, @r###"
2130        FrameVars(
2131            {
2132                "headers": Array(
2133                    [
2134                        Array(
2135                            [
2136                                String(
2137                                    "authorization",
2138                                ),
2139                                Meta {
2140                                    remarks: [
2141                                        Remark {
2142                                            ty: Removed,
2143                                            rule_id: "@password:remove",
2144                                            range: None,
2145                                        },
2146                                    ],
2147                                    errors: [],
2148                                    original_length: None,
2149                                    original_value: None,
2150                                },
2151                            ],
2152                        ),
2153                    ],
2154                ),
2155            },
2156        )
2157        "###);
2158    }
2159}