relay_pii/
processor.rs

1use std::borrow::Cow;
2use std::collections::{BTreeMap, BTreeSet};
3use std::mem;
4use std::sync::OnceLock;
5
6use regex::Regex;
7use relay_event_schema::processor::{
8    self, Chunk, FieldAttrs, Pii, ProcessValue, ProcessingAction, ProcessingResult,
9    ProcessingState, Processor, ValueType, enum_set, process_value,
10};
11use relay_event_schema::protocol::{
12    AsPair, Event, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User,
13};
14use relay_protocol::{Annotated, Array, Meta, Remark, RemarkType, Value};
15
16use crate::compiledconfig::{CompiledPiiConfig, RuleRef};
17use crate::config::RuleType;
18use crate::redactions::Redaction;
19use crate::regexes::{self, ANYTHING_REGEX, PatternType, ReplaceBehavior};
20use crate::utils;
21
22/// Controls how scrubbing rules are applied to attributes.
23#[derive(Debug, Clone, Copy)]
24pub enum AttributeMode {
25    /// Treat the attribute as an object and allow referring
26    /// to individual fields.
27    Object,
28    /// Identify the attribute with its value and apply all
29    /// rules there directly.
30    ValueOnly,
31}
32
33/// A processor that performs PII stripping.
34pub struct PiiProcessor<'a> {
35    /// Controls how rules are applied to attributes.
36    attribute_mode: AttributeMode,
37    compiled_config: &'a CompiledPiiConfig,
38}
39
40impl<'a> PiiProcessor<'a> {
41    /// Creates a new processor based on a config.
42    pub fn new(compiled_config: &'a CompiledPiiConfig) -> PiiProcessor<'a> {
43        // this constructor needs to be cheap... a new PiiProcessor is created for each event. Move
44        // any init logic into CompiledPiiConfig::new.
45        PiiProcessor {
46            compiled_config,
47            attribute_mode: AttributeMode::Object,
48        }
49    }
50
51    /// Sets an `AttributeMode` on this processor.
52    pub fn attribute_mode(mut self, attribute_mode: AttributeMode) -> Self {
53        self.attribute_mode = attribute_mode;
54        self
55    }
56
57    fn apply_all_rules(
58        &self,
59        meta: &mut Meta,
60        state: &ProcessingState<'_>,
61        mut value: Option<&mut String>,
62    ) -> ProcessingResult {
63        let pii = state.pii();
64        if pii == Pii::False {
65            return Ok(());
66        }
67
68        for (selector, rules) in self.compiled_config.applications.iter() {
69            if selector.matches_path(&state.path()) {
70                #[allow(clippy::needless_option_as_deref)]
71                for rule in rules {
72                    let reborrowed_value = value.as_deref_mut();
73                    apply_rule_to_value(meta, rule, state.path().key(), reborrowed_value)?;
74                }
75            }
76        }
77
78        Ok(())
79    }
80}
81
82impl Processor for PiiProcessor<'_> {
83    fn before_process<T: ProcessValue>(
84        &mut self,
85        value: Option<&T>,
86        meta: &mut Meta,
87        state: &ProcessingState<'_>,
88    ) -> ProcessingResult {
89        if let Some(Value::String(original_value)) = meta.original_value_as_mut() {
90            // Also apply pii scrubbing to the original value (set by normalization or other processors),
91            // such that we do not leak sensitive data through meta. Deletes `original_value` if an Error
92            // value is returned.
93            if let Some(parent) = state.iter().next() {
94                let path = state.path();
95                let new_state = parent.enter_borrowed(
96                    path.key().unwrap_or(""),
97                    Some(Cow::Borrowed(state.attrs())),
98                    enum_set!(ValueType::String),
99                );
100
101                if self
102                    .apply_all_rules(&mut Meta::default(), &new_state, Some(original_value))
103                    .is_err()
104                {
105                    // `apply_all_rules` returned `DeleteValueHard` or `DeleteValueSoft`, so delete the original as well.
106                    meta.set_original_value(Option::<String>::None);
107                }
108            }
109        }
110
111        // booleans cannot be PII, and strings are handled in process_string
112        if state.value_type().contains(ValueType::Boolean)
113            || state.value_type().contains(ValueType::String)
114        {
115            return Ok(());
116        }
117
118        if value.is_none() {
119            return Ok(());
120        }
121
122        // apply rules based on key/path
123        self.apply_all_rules(meta, state, None)
124    }
125
126    fn process_array<T>(
127        &mut self,
128        array: &mut Array<T>,
129        _meta: &mut Meta,
130        state: &ProcessingState<'_>,
131    ) -> ProcessingResult
132    where
133        T: ProcessValue,
134    {
135        if is_pairlist(array) {
136            for annotated in array {
137                let mut mapped = mem::take(annotated).map_value(T::into_value);
138
139                if let Some(Value::Array(pair)) = mapped.value_mut() {
140                    let mut value = mem::take(&mut pair[1]);
141                    let value_type = ValueType::for_field(&value);
142
143                    if let Some(key_name) = &pair[0].as_str() {
144                        // We enter the key of the first element of the array, since we treat it
145                        // as a pair.
146                        let key_state =
147                            state.enter_borrowed(key_name, state.inner_attrs(), value_type);
148                        // We process the value with a state that "simulates" the first value of the
149                        // array as if it was the key of a dictionary.
150                        process_value(&mut value, self, &key_state)?;
151                    }
152
153                    // Put value back into pair.
154                    pair[1] = value;
155                }
156
157                // Put pair back into array.
158                *annotated = T::from_value(mapped);
159            }
160
161            Ok(())
162        } else {
163            // If we didn't find a pairlist, we can process child values as normal.
164            array.process_child_values(self, state)
165        }
166    }
167
168    fn process_string(
169        &mut self,
170        value: &mut String,
171        meta: &mut Meta,
172        state: &ProcessingState<'_>,
173    ) -> ProcessingResult {
174        if let "" | "true" | "false" | "null" | "undefined" = value.as_str() {
175            return Ok(());
176        }
177
178        // same as before_process. duplicated here because we can only check for "true",
179        // "false" etc in process_string.
180        self.apply_all_rules(meta, state, Some(value))
181    }
182
183    fn process_native_image_path(
184        &mut self,
185        NativeImagePath(value): &mut NativeImagePath,
186        meta: &mut Meta,
187        state: &ProcessingState<'_>,
188    ) -> ProcessingResult {
189        // In NativeImagePath we must not strip the file's basename because that would break
190        // processing.
191        //
192        // We pop the basename from the end of the string, call process_string and push the
193        // basename again.
194        //
195        // The ranges in Meta should still be right as long as we only pop/push from the end of the
196        // string. If we decide that we need to preserve anything other than suffixes all PII
197        // tooltips/annotations are potentially wrong.
198
199        if let Some(index) = value.rfind(['/', '\\']) {
200            let basename = value.split_off(index);
201            match self.process_string(value, meta, state) {
202                Ok(()) => value.push_str(&basename),
203                Err(ProcessingAction::DeleteValueHard) | Err(ProcessingAction::DeleteValueSoft) => {
204                    basename[1..].clone_into(value);
205                }
206                Err(ProcessingAction::InvalidTransaction(x)) => {
207                    return Err(ProcessingAction::InvalidTransaction(x));
208                }
209            }
210        }
211
212        Ok(())
213    }
214
215    fn process_pairlist<T: ProcessValue + AsPair>(
216        &mut self,
217        value: &mut PairList<T>,
218        _meta: &mut Meta,
219        state: &ProcessingState,
220    ) -> ProcessingResult {
221        utils::process_pairlist(self, value, state)
222    }
223
224    fn process_attributes(
225        &mut self,
226        value: &mut relay_event_schema::protocol::Attributes,
227        _meta: &mut Meta,
228        state: &ProcessingState,
229    ) -> ProcessingResult {
230        match self.attribute_mode {
231            // Treat each attribute as an object and just process them field by field.
232            AttributeMode::Object => value.process_child_values(self, state),
233            // Identify each attribute with its `value` and only process that.
234            AttributeMode::ValueOnly => {
235                for (key, attribute) in value.0.iter_mut() {
236                    let Some(attribute) = attribute.value_mut() else {
237                        continue;
238                    };
239
240                    // We need some manual state management here because we're bypassing all the
241                    // intermediate structures and pointing at the value directly. This essentially
242                    // mimics the attributes and value type that the metastructure derivation would
243                    // produce for the attribute vaue.
244                    let attrs = FieldAttrs::new()
245                        .pii_dynamic(relay_event_schema::protocol::attribute_pii_from_conventions);
246                    let inner_value = &mut attribute.value.value;
247                    let inner_value_type = ValueType::for_field(inner_value);
248                    let entered =
249                        state.enter_borrowed(key, Some(Cow::Borrowed(&attrs)), inner_value_type);
250
251                    processor::process_value(inner_value, self, &entered)?;
252                    self.process_other(&mut attribute.other, state)?;
253                }
254                Ok(())
255            }
256        }
257    }
258
259    fn process_user(
260        &mut self,
261        user: &mut User,
262        _meta: &mut Meta,
263        state: &ProcessingState<'_>,
264    ) -> ProcessingResult {
265        let ip_was_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
266
267        // Recurse into the user and does PII processing on fields.
268        user.process_child_values(self, state)?;
269
270        let has_other_fields = user.id.value().is_some()
271            || user.username.value().is_some()
272            || user.email.value().is_some();
273
274        let ip_is_still_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
275
276        // If the IP address has become invalid as part of PII processing, we move it into the user
277        // ID. That ensures people can do IP hashing and still have a correct users-affected count.
278        //
279        // Right now both Snuba and EventUser discard unparseable IPs for indexing, and we assume
280        // we want to keep it that way.
281        //
282        // If there are any other fields set that take priority over the IP for uniquely
283        // identifying a user (has_other_fields), we do not want to do anything. The value will be
284        // wiped out in renormalization anyway.
285        if ip_was_valid && !has_other_fields && !ip_is_still_valid {
286            user.id = mem::take(&mut user.ip_address).map_value(|ip| ip.into_inner().into());
287            user.ip_address.meta_mut().add_remark(Remark::new(
288                RemarkType::Removed,
289                "pii:ip_address".to_owned(),
290            ));
291        }
292
293        Ok(())
294    }
295
296    // Replay PII processor entry point.
297    fn process_replay(
298        &mut self,
299        replay: &mut Replay,
300        _meta: &mut Meta,
301        state: &ProcessingState<'_>,
302    ) -> ProcessingResult {
303        replay.process_child_values(self, state)?;
304        Ok(())
305    }
306}
307
308#[derive(Default)]
309struct PairListProcessor {
310    is_pair: bool,
311    has_string_key: bool,
312}
313
314impl PairListProcessor {
315    /// Returns true if the processor identified the supplied data as an array composed of
316    /// a key (string) and a value.
317    fn is_pair_array(&self) -> bool {
318        self.is_pair && self.has_string_key
319    }
320}
321
322impl Processor for PairListProcessor {
323    fn process_array<T>(
324        &mut self,
325        value: &mut Array<T>,
326        _meta: &mut Meta,
327        state: &ProcessingState<'_>,
328    ) -> ProcessingResult
329    where
330        T: ProcessValue,
331    {
332        self.is_pair = state.depth() == 0 && value.len() == 2;
333        if self.is_pair {
334            let key_type = ValueType::for_field(&value[0]);
335            process_value(
336                &mut value[0],
337                self,
338                &state.enter_index(0, state.inner_attrs(), key_type),
339            )?;
340        }
341
342        Ok(())
343    }
344
345    fn process_string(
346        &mut self,
347        _value: &mut String,
348        _meta: &mut Meta,
349        state: &ProcessingState<'_>,
350    ) -> ProcessingResult where {
351        if state.depth() == 1 && state.path().index() == Some(0) {
352            self.has_string_key = true;
353        }
354
355        Ok(())
356    }
357}
358
359fn is_pairlist<T: ProcessValue>(array: &mut Array<T>) -> bool {
360    for element in array.iter_mut() {
361        let mut visitor = PairListProcessor::default();
362        process_value(element, &mut visitor, ProcessingState::root()).ok();
363        if !visitor.is_pair_array() {
364            return false;
365        }
366    }
367
368    !array.is_empty()
369}
370
371/// Scrubs GraphQL variables from the event.
372pub fn scrub_graphql(event: &mut Event) {
373    let mut keys: BTreeSet<&str> = BTreeSet::new();
374
375    let mut is_graphql = false;
376
377    // Collect the variables keys and scrub them out.
378    if let Some(request) = event.request.value_mut()
379        && let Some(Value::Object(data)) = request.data.value_mut()
380    {
381        if let Some(api_target) = request.api_target.value()
382            && api_target.eq_ignore_ascii_case("graphql")
383        {
384            is_graphql = true;
385        }
386
387        if is_graphql
388            && let Some(Annotated(Some(Value::Object(variables)), _)) = data.get_mut("variables")
389        {
390            for (key, value) in variables.iter_mut() {
391                keys.insert(key);
392                value.set_value(Some(Value::String("[Filtered]".to_owned())));
393            }
394        }
395    }
396
397    if !is_graphql {
398        return;
399    }
400
401    // Scrub PII from the data object if they match the variables keys.
402    if let Some(contexts) = event.contexts.value_mut()
403        && let Some(response) = contexts.get_mut::<ResponseContext>()
404        && let Some(Value::Object(data)) = response.data.value_mut()
405        && let Some(Annotated(Some(Value::Object(graphql_data)), _)) = data.get_mut("data")
406    {
407        if !keys.is_empty() {
408            scrub_graphql_data(&keys, graphql_data);
409        } else {
410            // If we don't have the variable keys, we scrub the whole data object
411            // because the query or mutation weren't parameterized.
412            data.remove("data");
413        }
414    }
415}
416
417/// Scrubs values from the data object to `[Filtered]`.
418fn scrub_graphql_data(keys: &BTreeSet<&str>, data: &mut BTreeMap<String, Annotated<Value>>) {
419    for (key, value) in data.iter_mut() {
420        match value.value_mut() {
421            Some(Value::Object(item_data)) => {
422                scrub_graphql_data(keys, item_data);
423            }
424            _ => {
425                if keys.contains(key.as_str()) {
426                    value.set_value(Some(Value::String("[Filtered]".to_owned())));
427                }
428            }
429        }
430    }
431}
432
433fn apply_rule_to_value(
434    meta: &mut Meta,
435    rule: &RuleRef,
436    key: Option<&str>,
437    mut value: Option<&mut String>,
438) -> ProcessingResult {
439    // The rule might specify to remove or to redact. If redaction is chosen, we need to
440    // chunk up the value, otherwise we need to simply mark the value for deletion.
441    let should_redact_chunks = !matches!(rule.redaction, Redaction::Default | Redaction::Remove);
442
443    // In case the value is not a string (but a container, bool or number) and the rule matches on
444    // anything, we can only remove the value (not replace, hash, etc).
445    if rule.ty == RuleType::Anything && (value.is_none() || !should_redact_chunks) {
446        // The value is a container, @anything on a container can do nothing but delete.
447        meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
448        return Err(ProcessingAction::DeleteValueHard);
449    }
450
451    macro_rules! apply_regex {
452        ($regex:expr, $replace_behavior:expr) => {
453            if let Some(ref mut value) = value {
454                processor::process_chunked_value(value, meta, |chunks| {
455                    apply_regex_to_chunks(chunks, rule, $regex, $replace_behavior)
456                });
457            }
458        };
459    }
460
461    for (pattern_type, regex, replace_behavior) in regexes::get_regex_for_rule_type(&rule.ty) {
462        match pattern_type {
463            PatternType::KeyValue => {
464                if regex.is_match(key.unwrap_or("")) {
465                    if value.is_some() && should_redact_chunks {
466                        // If we're given a string value here, redact the value like we would with
467                        // @anything.
468                        apply_regex!(&ANYTHING_REGEX, replace_behavior);
469                    } else {
470                        meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
471                        return Err(ProcessingAction::DeleteValueHard);
472                    }
473                } else {
474                    // If we did not redact using the key, we will redact the entire value if the key
475                    // appears in it.
476                    apply_regex!(regex, replace_behavior);
477                }
478            }
479            PatternType::Value => {
480                apply_regex!(regex, replace_behavior);
481            }
482        }
483    }
484
485    Ok(())
486}
487
488fn apply_regex_to_chunks<'a>(
489    chunks: Vec<Chunk<'a>>,
490    rule: &RuleRef,
491    regex: &Regex,
492    replace_behavior: ReplaceBehavior,
493) -> Vec<Chunk<'a>> {
494    // NB: This function allocates the entire string and all chunks a second time. This means it
495    // cannot reuse chunks and reallocates them. Ideally, we would be able to run the regex directly
496    // on the chunks, but the `regex` crate does not support that.
497
498    let mut search_string = String::new();
499    let mut has_text = false;
500    for chunk in &chunks {
501        match chunk {
502            Chunk::Text { text } => {
503                has_text = true;
504                search_string.push_str(&text.replace('\x00', ""));
505            }
506            Chunk::Redaction { .. } => search_string.push('\x00'),
507        }
508    }
509
510    if !has_text {
511        // Nothing to replace.
512        return chunks;
513    }
514
515    // Early exit if this regex does not match and return the original chunks.
516    let mut captures_iter = regex.captures_iter(&search_string).peekable();
517    if captures_iter.peek().is_none() {
518        return chunks;
519    }
520
521    let mut replacement_chunks = vec![];
522    for chunk in chunks {
523        if let Chunk::Redaction { .. } = chunk {
524            replacement_chunks.push(chunk);
525        }
526    }
527    replacement_chunks.reverse();
528
529    fn process_text<'a>(
530        text: &str,
531        rv: &mut Vec<Chunk<'a>>,
532        replacement_chunks: &mut Vec<Chunk<'a>>,
533    ) {
534        if text.is_empty() {
535            return;
536        }
537
538        static NULL_SPLIT_RE: OnceLock<Regex> = OnceLock::new();
539        let regex = NULL_SPLIT_RE.get_or_init(|| {
540            #[allow(clippy::trivial_regex)]
541            Regex::new("\x00").unwrap()
542        });
543
544        let mut pos = 0;
545        for piece in regex.find_iter(text) {
546            rv.push(Chunk::Text {
547                text: Cow::Owned(text[pos..piece.start()].to_string()),
548            });
549            rv.push(replacement_chunks.pop().unwrap());
550            pos = piece.end();
551        }
552
553        rv.push(Chunk::Text {
554            text: Cow::Owned(text[pos..].to_string()),
555        });
556    }
557
558    let mut pos = 0;
559    let mut rv = Vec::with_capacity(replacement_chunks.len());
560
561    match replace_behavior {
562        ReplaceBehavior::Groups(ref groups) => {
563            for m in captures_iter {
564                for (idx, g) in m.iter().enumerate() {
565                    if let Some(g) = g
566                        && groups.contains(&(idx as u8))
567                    {
568                        process_text(
569                            &search_string[pos..g.start()],
570                            &mut rv,
571                            &mut replacement_chunks,
572                        );
573                        insert_replacement_chunks(rule, g.as_str(), &mut rv);
574                        pos = g.end();
575                    }
576                }
577            }
578            process_text(&search_string[pos..], &mut rv, &mut replacement_chunks);
579            debug_assert!(replacement_chunks.is_empty());
580        }
581        ReplaceBehavior::Value => {
582            // We only want to replace a string value, and the replacement chunk for that is
583            // inserted by insert_replacement_chunks. Adding chunks from replacement_chunks
584            // results in the incorrect behavior of a total of more chunks than the input.
585            insert_replacement_chunks(rule, &search_string, &mut rv);
586        }
587    }
588    rv
589}
590
591fn insert_replacement_chunks(rule: &RuleRef, text: &str, output: &mut Vec<Chunk<'_>>) {
592    match &rule.redaction {
593        Redaction::Default | Redaction::Remove => {
594            output.push(Chunk::Redaction {
595                text: Cow::Borrowed(""),
596                rule_id: Cow::Owned(rule.origin.to_string()),
597                ty: RemarkType::Removed,
598            });
599        }
600        Redaction::Mask => {
601            let buf = vec!['*'; text.chars().count()];
602
603            output.push(Chunk::Redaction {
604                ty: RemarkType::Masked,
605                rule_id: Cow::Owned(rule.origin.to_string()),
606                text: buf.into_iter().collect(),
607            })
608        }
609        Redaction::Hash => {
610            output.push(Chunk::Redaction {
611                ty: RemarkType::Pseudonymized,
612                rule_id: Cow::Owned(rule.origin.to_string()),
613                text: Cow::Owned(utils::hash_value(text.as_bytes())),
614            });
615        }
616        Redaction::Replace(replace) => {
617            output.push(Chunk::Redaction {
618                ty: RemarkType::Substituted,
619                rule_id: Cow::Owned(rule.origin.to_string()),
620                text: Cow::Owned(replace.text.clone()),
621            });
622        }
623        Redaction::Other => relay_log::debug!("Incoming redaction is not supported"),
624    }
625}
626
627#[cfg(test)]
628mod tests {
629    use insta::{allow_duplicates, assert_debug_snapshot};
630    use relay_event_schema::processor::process_value;
631    use relay_event_schema::protocol::{
632        Addr, Breadcrumb, DebugImage, DebugMeta, ExtraValue, Headers, LogEntry, Message,
633        NativeDebugImage, Request, Span, TagEntry, Tags, TraceContext,
634    };
635    use relay_protocol::{FromValue, Object, assert_annotated_snapshot, get_value};
636    use serde_json::json;
637
638    use super::*;
639    use crate::{DataScrubbingConfig, PiiConfig, ReplaceRedaction};
640
641    fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Option<PiiConfig> {
642        use crate::convert::to_pii_config as to_pii_config_impl;
643        let rv = to_pii_config_impl(datascrubbing_config).unwrap();
644        if let Some(ref config) = rv {
645            let roundtrip: PiiConfig =
646                serde_json::from_value(serde_json::to_value(config).unwrap()).unwrap();
647            assert_eq!(&roundtrip, config);
648        }
649        rv
650    }
651
652    #[test]
653    fn test_scrub_original_value() {
654        let mut data = Event::from_value(
655            json!({
656                "user": {
657                    "username": "hey  man 73.133.27.120", // should be stripped despite not being "known ip field"
658                    "ip_address": "is this an ip address? 73.133.27.120", //  <--------
659                },
660                "hpkp":"invalid data my ip address is  74.133.27.120 and my credit card number is  4571234567890111 ",
661            })
662            .into(),
663        );
664
665        let scrubbing_config = DataScrubbingConfig {
666            scrub_data: true,
667            scrub_ip_addresses: true,
668            scrub_defaults: true,
669            ..Default::default()
670        };
671
672        let pii_config = to_pii_config(&scrubbing_config).unwrap();
673        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
674
675        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
676
677        assert_debug_snapshot!(&data);
678    }
679
680    #[test]
681    fn test_sentry_user() {
682        let mut data = Event::from_value(
683            json!({
684                "user": {
685                    "ip_address": "73.133.27.120",
686                    "sentry_user": "ip:73.133.27.120",
687                },
688            })
689            .into(),
690        );
691
692        let scrubbing_config = DataScrubbingConfig {
693            scrub_data: true,
694            scrub_ip_addresses: true,
695            scrub_defaults: true,
696            ..Default::default()
697        };
698
699        let pii_config = to_pii_config(&scrubbing_config).unwrap();
700        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
701
702        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
703
704        assert_debug_snapshot!(&data);
705    }
706
707    #[test]
708    fn test_basic_stripping() {
709        let config = serde_json::from_str::<PiiConfig>(
710            r#"
711            {
712                "rules": {
713                    "remove_bad_headers": {
714                        "type": "redact_pair",
715                        "keyPattern": "(?i)cookie|secret[-_]?key"
716                    }
717                },
718                "applications": {
719                    "$string": ["@ip"],
720                    "$object.**": ["remove_bad_headers"]
721                }
722            }
723            "#,
724        )
725        .unwrap();
726
727        let mut event = Annotated::new(Event {
728            logentry: Annotated::new(LogEntry {
729                formatted: Annotated::new("Hello world!".to_owned().into()),
730                ..Default::default()
731            }),
732            request: Annotated::new(Request {
733                env: {
734                    let mut rv = Object::new();
735                    rv.insert(
736                        "SECRET_KEY".to_owned(),
737                        Annotated::new(Value::String("134141231231231231231312".into())),
738                    );
739                    Annotated::new(rv)
740                },
741                headers: {
742                    let rv = vec![
743                        Annotated::new((
744                            Annotated::new("Cookie".to_owned().into()),
745                            Annotated::new("super secret".to_owned().into()),
746                        )),
747                        Annotated::new((
748                            Annotated::new("X-Forwarded-For".to_owned().into()),
749                            Annotated::new("127.0.0.1".to_owned().into()),
750                        )),
751                    ];
752                    Annotated::new(Headers(PairList(rv)))
753                },
754                ..Default::default()
755            }),
756            tags: Annotated::new(Tags(
757                vec![Annotated::new(TagEntry(
758                    Annotated::new("forwarded_for".to_owned()),
759                    Annotated::new("127.0.0.1".to_owned()),
760                ))]
761                .into(),
762            )),
763            ..Default::default()
764        });
765
766        let mut processor = PiiProcessor::new(config.compiled());
767        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
768        assert_annotated_snapshot!(event);
769    }
770
771    #[test]
772    fn test_redact_containers() {
773        let config = serde_json::from_str::<PiiConfig>(
774            r#"
775            {
776                "applications": {
777                    "$object": ["@anything"]
778                }
779            }
780            "#,
781        )
782        .unwrap();
783
784        let mut event = Annotated::new(Event {
785            extra: {
786                let mut map = Object::new();
787                map.insert(
788                    "foo".to_owned(),
789                    Annotated::new(ExtraValue(Value::String("bar".to_owned()))),
790                );
791                Annotated::new(map)
792            },
793            ..Default::default()
794        });
795
796        let mut processor = PiiProcessor::new(config.compiled());
797        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
798        assert_annotated_snapshot!(event);
799    }
800
801    #[test]
802    fn test_redact_custom_pattern() {
803        let config = serde_json::from_str::<PiiConfig>(
804            r#"
805            {
806                "applications": {
807                    "$string": ["myrule"]
808                },
809                "rules": {
810                    "myrule": {
811                        "type": "pattern",
812                        "pattern": "foo",
813                        "redaction": {
814                            "method": "replace",
815                            "text": "asd"
816                        }
817                    }
818                }
819            }
820            "#,
821        )
822        .unwrap();
823
824        let mut event = Annotated::new(Event {
825            extra: {
826                let mut map = Object::new();
827                map.insert(
828                    "myvalue".to_owned(),
829                    Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
830                );
831                Annotated::new(map)
832            },
833            ..Default::default()
834        });
835
836        let mut processor = PiiProcessor::new(config.compiled());
837        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
838        assert_annotated_snapshot!(event);
839    }
840
841    #[test]
842    fn test_redact_custom_negative_pattern() {
843        let config = serde_json::from_str::<PiiConfig>(
844            r#"
845            {
846                "applications": {
847                    "$string": ["myrule"]
848                },
849                "rules": {
850                    "myrule": {
851                        "type": "pattern",
852                        "pattern": "the good string|.*OK.*|(.*)",
853                        "replaceGroups": [1],
854                        "redaction": {
855                            "method": "mask"
856                        }
857                    }
858                }
859            }
860            "#,
861        )
862        .unwrap();
863
864        let mut event = Annotated::<Event>::from_json(
865            r#"{
866            "extra": {
867                "1": "the good string",
868                "2": "a bad string",
869                "3": "another OK string",
870                "4": "another bad one"
871            }
872        }"#,
873        )
874        .unwrap();
875
876        let mut processor = PiiProcessor::new(config.compiled());
877        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
878        assert_annotated_snapshot!(event.value().unwrap().extra, @r#"
879        {
880          "1": "the good string",
881          "2": "************",
882          "3": "another OK string",
883          "4": "***************",
884          "_meta": {
885            "2": {
886              "": {
887                "rem": [
888                  [
889                    "myrule",
890                    "m",
891                    0,
892                    12
893                  ]
894                ],
895                "len": 12
896              }
897            },
898            "4": {
899              "": {
900                "rem": [
901                  [
902                    "myrule",
903                    "m",
904                    0,
905                    15
906                  ]
907                ],
908                "len": 15
909              }
910            }
911          }
912        }
913        "#);
914    }
915
916    #[test]
917    fn test_no_field_upsert() {
918        let config = serde_json::from_str::<PiiConfig>(
919            r#"
920            {
921                "applications": {
922                    "**": ["@anything:remove"]
923                }
924            }
925            "#,
926        )
927        .unwrap();
928
929        let mut event = Annotated::new(Event {
930            extra: {
931                let mut map = Object::new();
932                map.insert(
933                    "myvalue".to_owned(),
934                    Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
935                );
936                Annotated::new(map)
937            },
938            ..Default::default()
939        });
940
941        let mut processor = PiiProcessor::new(config.compiled());
942        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
943        assert_annotated_snapshot!(event);
944    }
945
946    #[test]
947    fn test_anything_hash_on_string() {
948        let config = serde_json::from_str::<PiiConfig>(
949            r#"
950            {
951                "applications": {
952                    "$string": ["@anything:hash"]
953                }
954            }
955            "#,
956        )
957        .unwrap();
958
959        let mut event = Annotated::new(Event {
960            extra: {
961                let mut map = Object::new();
962                map.insert(
963                    "myvalue".to_owned(),
964                    Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
965                );
966                Annotated::new(map)
967            },
968            ..Default::default()
969        });
970
971        let mut processor = PiiProcessor::new(config.compiled());
972        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
973        assert_annotated_snapshot!(event);
974    }
975
976    #[test]
977    fn test_anything_hash_on_container() {
978        let config = serde_json::from_str::<PiiConfig>(
979            r#"
980            {
981                "applications": {
982                    "$object": ["@anything:hash"]
983                }
984            }
985            "#,
986        )
987        .unwrap();
988
989        let mut event = Annotated::new(Event {
990            extra: {
991                let mut map = Object::new();
992                map.insert(
993                    "myvalue".to_owned(),
994                    Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
995                );
996                Annotated::new(map)
997            },
998            ..Default::default()
999        });
1000
1001        let mut processor = PiiProcessor::new(config.compiled());
1002        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1003        assert_annotated_snapshot!(event);
1004    }
1005
1006    #[test]
1007    fn test_ignore_user_agent_ip_scrubbing() {
1008        let mut data = Event::from_value(
1009            json!({
1010                "request": {
1011                    "headers": [
1012                        ["User-Agent", "127.0.0.1"],
1013                        ["X-Client-Ip", "10.0.0.1"]
1014                    ]
1015                },
1016            })
1017            .into(),
1018        );
1019
1020        let scrubbing_config = DataScrubbingConfig {
1021            scrub_data: true,
1022            scrub_ip_addresses: true,
1023            scrub_defaults: true,
1024            ..Default::default()
1025        };
1026
1027        let pii_config = to_pii_config(&scrubbing_config).unwrap();
1028        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1029
1030        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1031
1032        assert_annotated_snapshot!(&data);
1033    }
1034
1035    #[test]
1036    fn test_remove_debugmeta_path() {
1037        let config = serde_json::from_str::<PiiConfig>(
1038            r#"
1039            {
1040                "applications": {
1041                    "debug_meta.images.*.code_file": ["@anything:remove"],
1042                    "debug_meta.images.*.debug_file": ["@anything:remove"]
1043                }
1044            }
1045            "#,
1046        )
1047        .unwrap();
1048
1049        let mut event = Annotated::new(Event {
1050            debug_meta: Annotated::new(DebugMeta {
1051                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1052                    NativeDebugImage {
1053                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1054                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1055                        debug_id: Annotated::new(
1056                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1057                        ),
1058                        debug_file: Annotated::new("wntdll.pdb".into()),
1059                        debug_checksum: Annotated::empty(),
1060                        arch: Annotated::new("arm64".to_owned()),
1061                        image_addr: Annotated::new(Addr(0)),
1062                        image_size: Annotated::new(4096),
1063                        image_vmaddr: Annotated::new(Addr(32768)),
1064                        other: {
1065                            let mut map = Object::new();
1066                            map.insert(
1067                                "other".to_owned(),
1068                                Annotated::new(Value::String("value".to_owned())),
1069                            );
1070                            map
1071                        },
1072                    },
1073                )))]),
1074                ..Default::default()
1075            }),
1076            ..Default::default()
1077        });
1078
1079        let mut processor = PiiProcessor::new(config.compiled());
1080        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1081        assert_annotated_snapshot!(event);
1082    }
1083
1084    #[test]
1085    fn test_replace_debugmeta_path() {
1086        let config = serde_json::from_str::<PiiConfig>(
1087            r#"
1088            {
1089                "applications": {
1090                    "debug_meta.images.*.code_file": ["@anything:replace"],
1091                    "debug_meta.images.*.debug_file": ["@anything:replace"]
1092                }
1093            }
1094            "#,
1095        )
1096        .unwrap();
1097
1098        let mut event = Annotated::new(Event {
1099            debug_meta: Annotated::new(DebugMeta {
1100                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1101                    NativeDebugImage {
1102                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1103                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1104                        debug_id: Annotated::new(
1105                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1106                        ),
1107                        debug_file: Annotated::new("wntdll.pdb".into()),
1108                        debug_checksum: Annotated::empty(),
1109                        arch: Annotated::new("arm64".to_owned()),
1110                        image_addr: Annotated::new(Addr(0)),
1111                        image_size: Annotated::new(4096),
1112                        image_vmaddr: Annotated::new(Addr(32768)),
1113                        other: {
1114                            let mut map = Object::new();
1115                            map.insert(
1116                                "other".to_owned(),
1117                                Annotated::new(Value::String("value".to_owned())),
1118                            );
1119                            map
1120                        },
1121                    },
1122                )))]),
1123                ..Default::default()
1124            }),
1125            ..Default::default()
1126        });
1127
1128        let mut processor = PiiProcessor::new(config.compiled());
1129        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1130        assert_annotated_snapshot!(event);
1131    }
1132
1133    #[test]
1134    fn test_hash_debugmeta_path() {
1135        let config = serde_json::from_str::<PiiConfig>(
1136            r#"
1137            {
1138                "applications": {
1139                    "debug_meta.images.*.code_file": ["@anything:hash"],
1140                    "debug_meta.images.*.debug_file": ["@anything:hash"]
1141                }
1142            }
1143            "#,
1144        )
1145        .unwrap();
1146
1147        let mut event = Annotated::new(Event {
1148            debug_meta: Annotated::new(DebugMeta {
1149                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1150                    NativeDebugImage {
1151                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1152                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1153                        debug_id: Annotated::new(
1154                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1155                        ),
1156                        debug_file: Annotated::new("wntdll.pdb".into()),
1157                        debug_checksum: Annotated::empty(),
1158                        arch: Annotated::new("arm64".to_owned()),
1159                        image_addr: Annotated::new(Addr(0)),
1160                        image_size: Annotated::new(4096),
1161                        image_vmaddr: Annotated::new(Addr(32768)),
1162                        other: {
1163                            let mut map = Object::new();
1164                            map.insert(
1165                                "other".to_owned(),
1166                                Annotated::new(Value::String("value".to_owned())),
1167                            );
1168                            map
1169                        },
1170                    },
1171                )))]),
1172                ..Default::default()
1173            }),
1174            ..Default::default()
1175        });
1176
1177        let mut processor = PiiProcessor::new(config.compiled());
1178        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1179        assert_annotated_snapshot!(event);
1180    }
1181
1182    #[test]
1183    fn test_debugmeta_path_not_addressible_with_wildcard_selector() {
1184        let config = serde_json::from_str::<PiiConfig>(
1185            r#"
1186            {
1187                "applications": {
1188                    "$string": ["@anything:remove"],
1189                    "**": ["@anything:remove"],
1190                    "debug_meta.**": ["@anything:remove"],
1191                    "(debug_meta.images.**.code_file & $string)": ["@anything:remove"]
1192                }
1193            }
1194            "#,
1195        )
1196        .unwrap();
1197
1198        let mut event = Annotated::new(Event {
1199            debug_meta: Annotated::new(DebugMeta {
1200                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1201                    NativeDebugImage {
1202                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1203                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1204                        debug_id: Annotated::new(
1205                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1206                        ),
1207                        debug_file: Annotated::new("wntdll.pdb".into()),
1208                        debug_checksum: Annotated::empty(),
1209                        arch: Annotated::new("arm64".to_owned()),
1210                        image_addr: Annotated::new(Addr(0)),
1211                        image_size: Annotated::new(4096),
1212                        image_vmaddr: Annotated::new(Addr(32768)),
1213                        other: {
1214                            let mut map = Object::new();
1215                            map.insert(
1216                                "other".to_owned(),
1217                                Annotated::new(Value::String("value".to_owned())),
1218                            );
1219                            map
1220                        },
1221                    },
1222                )))]),
1223                ..Default::default()
1224            }),
1225            ..Default::default()
1226        });
1227
1228        let mut processor = PiiProcessor::new(config.compiled());
1229        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1230        assert_annotated_snapshot!(event);
1231    }
1232
1233    #[test]
1234    fn test_quoted_keys() {
1235        let config = serde_json::from_str::<PiiConfig>(
1236            r#"
1237            {
1238                "applications": {
1239                    "extra.'special ,./<>?!@#$%^&*())''gärbage'''": ["@anything:remove"]
1240                }
1241            }
1242            "#,
1243        )
1244        .unwrap();
1245
1246        let mut event = Annotated::new(Event {
1247            extra: {
1248                let mut map = Object::new();
1249                map.insert(
1250                    "do not ,./<>?!@#$%^&*())'ßtrip'".to_owned(),
1251                    Annotated::new(ExtraValue(Value::String("foo".to_owned()))),
1252                );
1253                map.insert(
1254                    "special ,./<>?!@#$%^&*())'gärbage'".to_owned(),
1255                    Annotated::new(ExtraValue(Value::String("bar".to_owned()))),
1256                );
1257                Annotated::new(map)
1258            },
1259            ..Default::default()
1260        });
1261
1262        let mut processor = PiiProcessor::new(config.compiled());
1263        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1264        assert_annotated_snapshot!(event);
1265    }
1266
1267    #[test]
1268    fn test_logentry_value_types() {
1269        // Assert that logentry.formatted is addressable as $string, $message and $logentry.formatted.
1270        for formatted_selector in &[
1271            "$logentry.formatted",
1272            "$message",
1273            "$logentry.formatted && $message",
1274            "$string",
1275        ] {
1276            let config = serde_json::from_str::<PiiConfig>(&format!(
1277                r##"
1278                {{
1279                    "applications": {{
1280                        "{formatted_selector}": ["@anything:remove"]
1281                    }}
1282                }}
1283                "##
1284            ))
1285            .unwrap();
1286
1287            let mut event = Annotated::new(Event {
1288                logentry: Annotated::new(LogEntry {
1289                    formatted: Annotated::new("Hello world!".to_owned().into()),
1290                    ..Default::default()
1291                }),
1292                ..Default::default()
1293            });
1294
1295            let mut processor = PiiProcessor::new(config.compiled());
1296            process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1297            assert!(
1298                event
1299                    .value()
1300                    .unwrap()
1301                    .logentry
1302                    .value()
1303                    .unwrap()
1304                    .formatted
1305                    .value()
1306                    .is_none()
1307            );
1308        }
1309    }
1310
1311    #[test]
1312    fn test_logentry_formatted_never_fully_filtered() {
1313        // Test that logentry.formatted gets smart PII scrubbing via to_pii_config
1314        // and is never completely filtered even with aggressive PII rules
1315        let config = crate::convert::to_pii_config(&crate::DataScrubbingConfig {
1316            scrub_data: true,
1317            scrub_defaults: true,
1318            scrub_ip_addresses: true,
1319            ..Default::default()
1320        })
1321        .unwrap()
1322        .unwrap();
1323
1324        let mut event = Annotated::new(Event {
1325            logentry: Annotated::new(LogEntry {
1326                formatted: Annotated::new(
1327                    "User john.doe@company.com failed login with card 4111-1111-1111-1111"
1328                        .to_owned()
1329                        .into(),
1330                ),
1331                ..Default::default()
1332            }),
1333            ..Default::default()
1334        });
1335
1336        let mut processor = PiiProcessor::new(config.compiled());
1337        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1338        assert_annotated_snapshot!(event, @r#"
1339        {
1340          "logentry": {
1341            "formatted": "User [email] failed login with card [creditcard]"
1342          },
1343          "_meta": {
1344            "logentry": {
1345              "formatted": {
1346                "": {
1347                  "rem": [
1348                    [
1349                      "@email:replace",
1350                      "s",
1351                      5,
1352                      12
1353                    ],
1354                    [
1355                      "@creditcard:replace",
1356                      "s",
1357                      36,
1358                      48
1359                    ]
1360                  ],
1361                  "len": 68
1362                }
1363              }
1364            }
1365          }
1366        }
1367        "#);
1368    }
1369
1370    #[test]
1371    fn test_logentry_formatted_bearer_token_scrubbing() {
1372        // Test that bearer tokens are properly scrubbed in logentry.formatted
1373        let config = crate::convert::to_pii_config(&crate::DataScrubbingConfig {
1374            scrub_data: true,
1375            scrub_defaults: true,
1376            ..Default::default()
1377        })
1378        .unwrap()
1379        .unwrap();
1380
1381        let mut event = Annotated::new(Event {
1382            logentry: Annotated::new(LogEntry {
1383                formatted: Annotated::new(
1384                    "API request failed with Bearer ABC123XYZ789TOKEN and other data"
1385                        .to_owned()
1386                        .into(),
1387                ),
1388                ..Default::default()
1389            }),
1390            ..Default::default()
1391        });
1392
1393        let mut processor = PiiProcessor::new(config.compiled());
1394        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1395        assert_annotated_snapshot!(event, @r#"
1396        {
1397          "logentry": {
1398            "formatted": "API request failed with Bearer [token] and other data"
1399          },
1400          "_meta": {
1401            "logentry": {
1402              "formatted": {
1403                "": {
1404                  "rem": [
1405                    [
1406                      "@bearer:replace",
1407                      "s",
1408                      24,
1409                      38
1410                    ]
1411                  ],
1412                  "len": 63
1413                }
1414              }
1415            }
1416          }
1417        }
1418        "#);
1419    }
1420
1421    #[test]
1422    fn test_logentry_formatted_password_word_not_scrubbed() {
1423        let config = PiiConfig::default();
1424        let mut event = Annotated::new(Event {
1425            logentry: Annotated::new(LogEntry {
1426                formatted: Annotated::new(
1427                    "User password is secret123 for authentication"
1428                        .to_owned()
1429                        .into(),
1430                ),
1431                ..Default::default()
1432            }),
1433            ..Default::default()
1434        });
1435
1436        let mut processor = PiiProcessor::new(config.compiled());
1437        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1438        assert_annotated_snapshot!(event, @r#"
1439        {
1440          "logentry": {
1441            "formatted": "User password is secret123 for authentication"
1442          }
1443        }
1444        "#);
1445    }
1446
1447    #[test]
1448    fn test_ip_address_hashing() {
1449        let config = serde_json::from_str::<PiiConfig>(
1450            r#"
1451            {
1452                "applications": {
1453                    "$user.ip_address": ["@ip:hash"]
1454                }
1455            }
1456            "#,
1457        )
1458        .unwrap();
1459
1460        let mut event = Annotated::new(Event {
1461            user: Annotated::new(User {
1462                ip_address: Annotated::new(IpAddr("127.0.0.1".to_owned())),
1463                ..Default::default()
1464            }),
1465            ..Default::default()
1466        });
1467
1468        let mut processor = PiiProcessor::new(config.compiled());
1469        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1470
1471        let user = event.value().unwrap().user.value().unwrap();
1472
1473        assert!(user.ip_address.value().is_none());
1474
1475        assert_eq!(
1476            user.id.value().unwrap().as_str(),
1477            "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1478        );
1479    }
1480
1481    #[test]
1482    fn test_ip_address_hashing_does_not_overwrite_id() {
1483        let config = serde_json::from_str::<PiiConfig>(
1484            r#"
1485            {
1486                "applications": {
1487                    "$user.ip_address": ["@ip:hash"]
1488                }
1489            }
1490            "#,
1491        )
1492        .unwrap();
1493
1494        let mut event = Annotated::new(Event {
1495            user: Annotated::new(User {
1496                id: Annotated::new("123".to_owned().into()),
1497                ip_address: Annotated::new(IpAddr("127.0.0.1".to_owned())),
1498                ..Default::default()
1499            }),
1500            ..Default::default()
1501        });
1502
1503        let mut processor = PiiProcessor::new(config.compiled());
1504        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1505
1506        let user = event.value().unwrap().user.value().unwrap();
1507
1508        // This will get wiped out in renormalization though
1509        assert_eq!(
1510            user.ip_address.value().unwrap().as_str(),
1511            "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1512        );
1513
1514        assert_eq!(user.id.value().unwrap().as_str(), "123");
1515    }
1516
1517    #[test]
1518    fn test_replace_replaced_text() {
1519        let chunks = vec![Chunk::Redaction {
1520            text: "[ip]".into(),
1521            rule_id: "@ip".into(),
1522            ty: RemarkType::Substituted,
1523        }];
1524        let rule = RuleRef {
1525            id: "@ip:replace".into(),
1526            origin: "@ip".into(),
1527            ty: RuleType::Ip,
1528            redaction: Redaction::Replace(ReplaceRedaction {
1529                text: "[ip]".into(),
1530            }),
1531        };
1532        let res = apply_regex_to_chunks(
1533            chunks.clone(),
1534            &rule,
1535            &Regex::new(r#".*"#).unwrap(),
1536            ReplaceBehavior::Value,
1537        );
1538        assert_eq!(chunks, res);
1539    }
1540
1541    #[test]
1542    fn test_replace_replaced_text_anything() {
1543        let chunks = vec![Chunk::Redaction {
1544            text: "[Filtered]".into(),
1545            rule_id: "@password:filter".into(),
1546            ty: RemarkType::Substituted,
1547        }];
1548        let rule = RuleRef {
1549            id: "@anything:filter".into(),
1550            origin: "@anything:filter".into(),
1551            ty: RuleType::Anything,
1552            redaction: Redaction::Replace(ReplaceRedaction {
1553                text: "[Filtered]".into(),
1554            }),
1555        };
1556        let res = apply_regex_to_chunks(
1557            chunks.clone(),
1558            &rule,
1559            &Regex::new(r#".*"#).unwrap(),
1560            ReplaceBehavior::Groups(smallvec::smallvec![0]),
1561        );
1562        assert_eq!(chunks, res);
1563    }
1564
1565    #[test]
1566    fn test_trace_route_params_scrubbed() {
1567        let mut trace_context: Annotated<TraceContext> = Annotated::from_json(
1568            r#"
1569            {
1570                "type": "trace",
1571                "trace_id": "4c79f60c11214eb38604f4ae0781bfb2",
1572                "span_id": "fa90fdead5f74052",
1573                "data": {
1574                    "previousRoute": {
1575                        "params": {
1576                            "password": "test"
1577                        }
1578                    }
1579                }
1580            }
1581            "#,
1582        )
1583        .unwrap();
1584
1585        let ds_config = DataScrubbingConfig {
1586            scrub_data: true,
1587            scrub_defaults: true,
1588            ..Default::default()
1589        };
1590        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1591        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1592
1593        process_value(
1594            &mut trace_context,
1595            &mut pii_processor,
1596            ProcessingState::root(),
1597        )
1598        .unwrap();
1599        assert_annotated_snapshot!(trace_context);
1600    }
1601
1602    #[test]
1603    fn test_scrub_span_data_http_not_scrubbed() {
1604        let mut span: Annotated<Span> = Annotated::from_json(
1605            r#"{
1606                "data": {
1607                    "http": {
1608                        "query": "dance=true"
1609                    }
1610                }
1611            }"#,
1612        )
1613        .unwrap();
1614
1615        let ds_config = DataScrubbingConfig {
1616            scrub_data: true,
1617            scrub_defaults: true,
1618            ..Default::default()
1619        };
1620        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1621        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1622
1623        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1624        assert_annotated_snapshot!(span);
1625    }
1626
1627    #[test]
1628    fn test_scrub_span_data_http_strings_are_scrubbed() {
1629        let mut span: Annotated<Span> = Annotated::from_json(
1630            r#"{
1631                "data": {
1632                    "http": {
1633                        "query": "ccnumber=5105105105105100&process_id=123",
1634                        "fragment": "ccnumber=5105105105105100,process_id=123"
1635                    }
1636                }
1637            }"#,
1638        )
1639        .unwrap();
1640
1641        let ds_config = DataScrubbingConfig {
1642            scrub_data: true,
1643            scrub_defaults: true,
1644            ..Default::default()
1645        };
1646        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1647        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1648
1649        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1650        assert_annotated_snapshot!(span);
1651    }
1652
1653    #[test]
1654    fn test_scrub_span_data_http_objects_are_scrubbed() {
1655        let mut span: Annotated<Span> = Annotated::from_json(
1656            r#"{
1657                "data": {
1658                    "http": {
1659                        "query": {
1660                            "ccnumber": "5105105105105100",
1661                            "process_id": "123"
1662                        },
1663                        "fragment": {
1664                            "ccnumber": "5105105105105100",
1665                            "process_id": "123"
1666                        }
1667                    }
1668                }
1669            }"#,
1670        )
1671        .unwrap();
1672
1673        let ds_config = DataScrubbingConfig {
1674            scrub_data: true,
1675            scrub_defaults: true,
1676            ..Default::default()
1677        };
1678        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1679        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1680
1681        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1682        assert_annotated_snapshot!(span);
1683    }
1684
1685    #[test]
1686    fn test_scrub_span_data_untyped_props_are_scrubbed() {
1687        let mut span: Annotated<Span> = Annotated::from_json(
1688            r#"{
1689                "data": {
1690                    "untyped": "ccnumber=5105105105105100",
1691                    "more_untyped": {
1692                        "typed": "no",
1693                        "scrubbed": "yes",
1694                        "ccnumber": "5105105105105100"
1695                    }
1696                }
1697            }"#,
1698        )
1699        .unwrap();
1700
1701        let ds_config = DataScrubbingConfig {
1702            scrub_data: true,
1703            scrub_defaults: true,
1704            ..Default::default()
1705        };
1706        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1707        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1708
1709        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1710        assert_annotated_snapshot!(span);
1711    }
1712
1713    #[test]
1714    fn test_span_data_pii() {
1715        let mut span = Span::from_value(
1716            json!({
1717                "data": {
1718                    "code.filepath": "src/sentry/api/authentication.py",
1719                }
1720            })
1721            .into(),
1722        );
1723
1724        let ds_config = DataScrubbingConfig {
1725            scrub_data: true,
1726            scrub_defaults: true,
1727            ..Default::default()
1728        };
1729        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1730
1731        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1732        processor::process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1733        assert_eq!(
1734            get_value!(span.data.code_filepath!).as_str(),
1735            Some("src/sentry/api/authentication.py")
1736        );
1737    }
1738
1739    #[test]
1740    fn test_csp_source_file_pii() {
1741        let mut event = Event::from_value(
1742            json!({
1743                "csp": {
1744                    "source_file": "authentication.js",
1745                }
1746            })
1747            .into(),
1748        );
1749
1750        let config = serde_json::from_str::<PiiConfig>(
1751            r#"
1752            {
1753                "applications": {
1754                    "csp.source_file": ["@anything:filter"]
1755                }
1756            }
1757            "#,
1758        )
1759        .unwrap();
1760
1761        let mut pii_processor = PiiProcessor::new(config.compiled());
1762        processor::process_value(&mut event, &mut pii_processor, ProcessingState::root()).unwrap();
1763        assert_eq!(get_value!(event.csp.source_file!).as_str(), "[Filtered]");
1764    }
1765
1766    #[test]
1767    fn test_scrub_breadcrumb_data_http_not_scrubbed() {
1768        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1769            r#"{
1770                "data": {
1771                    "http": {
1772                        "query": "dance=true"
1773                    }
1774                }
1775            }"#,
1776        )
1777        .unwrap();
1778
1779        let ds_config = DataScrubbingConfig {
1780            scrub_data: true,
1781            scrub_defaults: true,
1782            ..Default::default()
1783        };
1784        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1785        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1786        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1787        assert_annotated_snapshot!(breadcrumb);
1788    }
1789
1790    #[test]
1791    fn test_scrub_breadcrumb_data_http_strings_are_scrubbed() {
1792        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1793            r#"{
1794                "data": {
1795                    "http": {
1796                        "query": "ccnumber=5105105105105100&process_id=123",
1797                        "fragment": "ccnumber=5105105105105100,process_id=123"
1798                    }
1799                }
1800            }"#,
1801        )
1802        .unwrap();
1803
1804        let ds_config = DataScrubbingConfig {
1805            scrub_data: true,
1806            scrub_defaults: true,
1807            ..Default::default()
1808        };
1809        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1810        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1811        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1812        assert_annotated_snapshot!(breadcrumb);
1813    }
1814
1815    #[test]
1816    fn test_scrub_breadcrumb_data_http_objects_are_scrubbed() {
1817        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1818            r#"{
1819                "data": {
1820                    "http": {
1821                        "query": {
1822                            "ccnumber": "5105105105105100",
1823                            "process_id": "123"
1824                        },
1825                        "fragment": {
1826                            "ccnumber": "5105105105105100",
1827                            "process_id": "123"
1828                        }
1829                    }
1830                }
1831            }"#,
1832        )
1833        .unwrap();
1834
1835        let ds_config = DataScrubbingConfig {
1836            scrub_data: true,
1837            scrub_defaults: true,
1838            ..Default::default()
1839        };
1840        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1841        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1842
1843        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1844        assert_annotated_snapshot!(breadcrumb);
1845    }
1846
1847    #[test]
1848    fn test_scrub_breadcrumb_data_untyped_props_are_scrubbed() {
1849        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1850            r#"{
1851                "data": {
1852                    "untyped": "ccnumber=5105105105105100",
1853                    "more_untyped": {
1854                        "typed": "no",
1855                        "scrubbed": "yes",
1856                        "ccnumber": "5105105105105100"
1857                    }
1858                }
1859            }"#,
1860        )
1861        .unwrap();
1862
1863        let ds_config = DataScrubbingConfig {
1864            scrub_data: true,
1865            scrub_defaults: true,
1866            ..Default::default()
1867        };
1868        let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1869        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1870        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1871        assert_annotated_snapshot!(breadcrumb);
1872    }
1873
1874    #[test]
1875    fn test_scrub_graphql_response_data_with_variables() {
1876        let mut data = Event::from_value(
1877            json!({
1878              "request": {
1879                "data": {
1880                  "query": "{\n  viewer {\n    login\n  }\n}",
1881                  "variables": {
1882                    "login": "foo"
1883                  }
1884                },
1885                "api_target": "graphql"
1886              },
1887              "contexts": {
1888                "response": {
1889                  "type": "response",
1890                  "data": {
1891                    "data": {
1892                      "viewer": {
1893                        "login": "foo"
1894                      }
1895                    }
1896                  }
1897                }
1898              }
1899            })
1900            .into(),
1901        );
1902
1903        scrub_graphql(data.value_mut().as_mut().unwrap());
1904
1905        assert_debug_snapshot!(&data);
1906    }
1907
1908    #[test]
1909    fn test_scrub_graphql_response_data_without_variables() {
1910        let mut data = Event::from_value(
1911            json!({
1912              "request": {
1913                "data": {
1914                  "query": "{\n  viewer {\n    login\n  }\n}"
1915                },
1916                "api_target": "graphql"
1917              },
1918              "contexts": {
1919                "response": {
1920                  "type": "response",
1921                  "data": {
1922                    "data": {
1923                      "viewer": {
1924                        "login": "foo"
1925                      }
1926                    }
1927                  }
1928                }
1929              }
1930            })
1931            .into(),
1932        );
1933
1934        scrub_graphql(data.value_mut().as_mut().unwrap());
1935        assert_debug_snapshot!(&data);
1936    }
1937
1938    #[test]
1939    fn test_does_not_scrub_if_no_graphql() {
1940        let mut data = Event::from_value(
1941            json!({
1942              "request": {
1943                "data": {
1944                  "query": "{\n  viewer {\n    login\n  }\n}",
1945                  "variables": {
1946                    "login": "foo"
1947                  }
1948                },
1949              },
1950              "contexts": {
1951                "response": {
1952                  "type": "response",
1953                  "data": {
1954                    "data": {
1955                      "viewer": {
1956                        "login": "foo"
1957                      }
1958                    }
1959                  }
1960                }
1961              }
1962            })
1963            .into(),
1964        );
1965
1966        let scrubbing_config = DataScrubbingConfig {
1967            scrub_data: true,
1968            scrub_ip_addresses: true,
1969            scrub_defaults: true,
1970            ..Default::default()
1971        };
1972
1973        let pii_config = to_pii_config(&scrubbing_config).unwrap();
1974        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1975
1976        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1977
1978        assert_debug_snapshot!(&data);
1979    }
1980
1981    #[test]
1982    fn test_logentry_params_scrubbed() {
1983        let config = serde_json::from_str::<PiiConfig>(
1984            r##"
1985                {
1986                    "applications": {
1987                        "$string": ["@anything:remove"]
1988                    }
1989                }
1990                "##,
1991        )
1992        .unwrap();
1993
1994        let mut event = Annotated::new(Event {
1995            logentry: Annotated::new(LogEntry {
1996                message: Annotated::new(Message::from("failed to parse report id=%s".to_owned())),
1997                formatted: Annotated::new("failed to parse report id=1".to_owned().into()),
1998                params: Annotated::new(Value::Array(vec![Annotated::new(Value::String(
1999                    "12345".to_owned(),
2000                ))])),
2001                ..Default::default()
2002            }),
2003            ..Default::default()
2004        });
2005
2006        let mut processor = PiiProcessor::new(config.compiled());
2007        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2008
2009        let params = get_value!(event.logentry.params!);
2010        assert_debug_snapshot!(params, @r###"
2011        Array(
2012            [
2013                Meta {
2014                    remarks: [
2015                        Remark {
2016                            ty: Removed,
2017                            rule_id: "@anything:remove",
2018                            range: None,
2019                        },
2020                    ],
2021                    errors: [],
2022                    original_length: None,
2023                    original_value: None,
2024                },
2025            ],
2026        )
2027        "###);
2028    }
2029
2030    #[test]
2031    fn test_is_pairlist() {
2032        for (case, expected) in [
2033            (r#"[]"#, false),
2034            (r#"["foo"]"#, false),
2035            (r#"["foo", 123]"#, false),
2036            (r#"[[1, "foo"]]"#, false),
2037            (r#"[[["too_nested", 123]]]"#, false),
2038            (r#"[["foo", "bar"], [1, "foo"]]"#, false),
2039            (r#"[["foo", "bar"], ["foo", "bar", "baz"]]"#, false),
2040            (r#"[["foo", "bar", "baz"], ["foo", "bar"]]"#, false),
2041            (r#"["foo", ["bar", "baz"], ["foo", "bar"]]"#, false),
2042            (r#"[["foo", "bar"], [["too_nested", 123]]]"#, false),
2043            (r#"[["foo", 123]]"#, true),
2044            (r#"[["foo", "bar"]]"#, true),
2045            (
2046                r#"[["foo", "bar"], ["foo", {"nested": {"something": 1}}]]"#,
2047                true,
2048            ),
2049        ] {
2050            let v = Annotated::<Value>::from_json(case).unwrap();
2051            let Annotated(Some(Value::Array(mut a)), _) = v else {
2052                panic!()
2053            };
2054            assert_eq!(is_pairlist(&mut a), expected, "{case}");
2055        }
2056    }
2057
2058    #[test]
2059    fn test_tuple_array_scrubbed_with_path_selector() {
2060        // We expect that both of these configs express the same semantics.
2061        let configs = vec![
2062            // This configuration matches on the authorization element (the 1st element of the array
2063            // represents the key).
2064            r##"
2065                {
2066                    "applications": {
2067                        "exception.values.0.stacktrace.frames.0.vars.headers.authorization": ["@anything:replace"]
2068                    }
2069                }
2070                "##,
2071            // This configuration matches on the 2nd element of the array.
2072            r##"
2073                {
2074                    "applications": {
2075                        "exception.values.0.stacktrace.frames.0.vars.headers.0.1": ["@anything:replace"]
2076                    }
2077                }
2078                "##,
2079        ];
2080
2081        let mut event = Event::from_value(
2082            serde_json::json!(
2083            {
2084              "message": "hi",
2085              "exception": {
2086                "values": [
2087                  {
2088                    "type": "BrokenException",
2089                    "value": "Something failed",
2090                    "stacktrace": {
2091                      "frames": [
2092                        {
2093                            "vars": {
2094                                "headers": [
2095                                    ["authorization", "Bearer abc123"]
2096                                ]
2097                            }
2098                        }
2099                      ]
2100                    }
2101                  }
2102                ]
2103              }
2104            })
2105            .into(),
2106        );
2107
2108        for config in configs {
2109            let config = serde_json::from_str::<PiiConfig>(config).unwrap();
2110            let mut processor = PiiProcessor::new(config.compiled());
2111            process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2112
2113            let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
2114
2115            allow_duplicates!(assert_debug_snapshot!(vars, @r###"
2116                              FrameVars(
2117                                  {
2118                                      "headers": Array(
2119                                          [
2120                                              Array(
2121                                                  [
2122                                                      String(
2123                                                          "authorization",
2124                                                      ),
2125                                                      Annotated(
2126                                                          String(
2127                                                              "[Filtered]",
2128                                                          ),
2129                                                          Meta {
2130                                                              remarks: [
2131                                                                  Remark {
2132                                                                      ty: Substituted,
2133                                                                      rule_id: "@anything:replace",
2134                                                                      range: Some(
2135                                                                          (
2136                                                                              0,
2137                                                                              10,
2138                                                                          ),
2139                                                                      ),
2140                                                                  },
2141                                                              ],
2142                                                              errors: [],
2143                                                              original_length: Some(
2144                                                                  13,
2145                                                              ),
2146                                                              original_value: None,
2147                                                          },
2148                                                      ),
2149                                                  ],
2150                                              ),
2151                                          ],
2152                                      ),
2153                                  },
2154                              )
2155                              "###));
2156        }
2157    }
2158
2159    #[test]
2160    fn test_tuple_array_scrubbed_with_string_selector_and_password_matcher() {
2161        let config = serde_json::from_str::<PiiConfig>(
2162            r##"
2163                {
2164                    "applications": {
2165                        "$string": ["@password:remove"]
2166                    }
2167                }
2168                "##,
2169        )
2170        .unwrap();
2171
2172        let mut event = Event::from_value(
2173            serde_json::json!(
2174            {
2175              "message": "hi",
2176              "exception": {
2177                "values": [
2178                  {
2179                    "type": "BrokenException",
2180                    "value": "Something failed",
2181                    "stacktrace": {
2182                      "frames": [
2183                        {
2184                            "vars": {
2185                                "headers": [
2186                                    ["authorization", "abc123"]
2187                                ]
2188                            }
2189                        }
2190                      ]
2191                    }
2192                  }
2193                ]
2194              }
2195            })
2196            .into(),
2197        );
2198
2199        let mut processor = PiiProcessor::new(config.compiled());
2200        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2201
2202        let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
2203
2204        assert_debug_snapshot!(vars, @r###"
2205        FrameVars(
2206            {
2207                "headers": Array(
2208                    [
2209                        Array(
2210                            [
2211                                String(
2212                                    "authorization",
2213                                ),
2214                                Meta {
2215                                    remarks: [
2216                                        Remark {
2217                                            ty: Removed,
2218                                            rule_id: "@password:remove",
2219                                            range: None,
2220                                        },
2221                                    ],
2222                                    errors: [],
2223                                    original_length: None,
2224                                    original_value: None,
2225                                },
2226                            ],
2227                        ),
2228                    ],
2229                ),
2230            },
2231        )
2232        "###);
2233    }
2234}