Skip to main content

relay_pii/
processor.rs

1use std::borrow::Cow;
2use std::collections::{BTreeMap, BTreeSet};
3use std::mem;
4use std::sync::OnceLock;
5
6use regex::Regex;
7use relay_event_schema::processor::{
8    self, Chunk, FieldAttrs, Pii, ProcessValue, ProcessingAction, ProcessingResult,
9    ProcessingState, Processor, ValueType, enum_set, process_value,
10};
11use relay_event_schema::protocol::{
12    AsPair, Event, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User,
13};
14use relay_protocol::{Annotated, Array, Meta, Remark, RemarkType, Value};
15
16use crate::compiledconfig::{CompiledPiiConfig, RuleRef};
17use crate::config::RuleType;
18use crate::redactions::Redaction;
19use crate::regexes::{self, ANYTHING_REGEX, PatternType, ReplaceBehavior};
20use crate::utils;
21
22/// Controls how scrubbing rules are applied to attributes.
23#[derive(Debug, Clone, Copy)]
24pub enum AttributeMode {
25    /// Treat the attribute as an object and allow referring
26    /// to individual fields.
27    Object,
28    /// Identify the attribute with its value and apply all
29    /// rules there directly.
30    ValueOnly,
31}
32
33/// A processor that performs PII stripping.
34pub struct PiiProcessor<'a> {
35    /// Controls how rules are applied to attributes.
36    attribute_mode: AttributeMode,
37    compiled_config: &'a CompiledPiiConfig,
38}
39
40impl<'a> PiiProcessor<'a> {
41    /// Creates a new processor based on a config.
42    pub fn new(compiled_config: &'a CompiledPiiConfig) -> PiiProcessor<'a> {
43        // this constructor needs to be cheap... a new PiiProcessor is created for each event. Move
44        // any init logic into CompiledPiiConfig::new.
45        PiiProcessor {
46            compiled_config,
47            attribute_mode: AttributeMode::Object,
48        }
49    }
50
51    /// Sets an `AttributeMode` on this processor.
52    pub fn attribute_mode(mut self, attribute_mode: AttributeMode) -> Self {
53        self.attribute_mode = attribute_mode;
54        self
55    }
56
57    fn apply_all_rules(
58        &self,
59        meta: &mut Meta,
60        state: &ProcessingState<'_>,
61        mut value: Option<&mut String>,
62    ) -> ProcessingResult {
63        let pii = state.pii();
64        if pii == Pii::False {
65            return Ok(());
66        }
67
68        for (selector, rules) in self.compiled_config.applications.iter() {
69            if selector.matches_path(&state.path()) {
70                #[allow(clippy::needless_option_as_deref)]
71                for rule in rules {
72                    let reborrowed_value = value.as_deref_mut();
73                    apply_rule_to_value(meta, rule, state.path().key(), reborrowed_value)?;
74                }
75            }
76        }
77
78        Ok(())
79    }
80}
81
82impl Processor for PiiProcessor<'_> {
83    fn before_process<T: ProcessValue>(
84        &mut self,
85        value: Option<&T>,
86        meta: &mut Meta,
87        state: &ProcessingState<'_>,
88    ) -> ProcessingResult {
89        if let Some(Value::String(original_value)) = meta.original_value_as_mut() {
90            // Also apply pii scrubbing to the original value (set by normalization or other processors),
91            // such that we do not leak sensitive data through meta. Deletes `original_value` if an Error
92            // value is returned.
93            if let Some(parent) = state.iter().next() {
94                let path = state.path();
95                let new_state = parent.enter_borrowed(
96                    path.key().unwrap_or(""),
97                    Some(Cow::Borrowed(state.attrs())),
98                    enum_set!(ValueType::String),
99                );
100
101                if self
102                    .apply_all_rules(&mut Meta::default(), &new_state, Some(original_value))
103                    .is_err()
104                {
105                    // `apply_all_rules` returned `DeleteValueHard` or `DeleteValueSoft`, so delete the original as well.
106                    meta.set_original_value(Option::<String>::None);
107                }
108            }
109        }
110
111        // booleans cannot be PII, and strings are handled in process_string
112        if state.value_type().contains(ValueType::Boolean)
113            || state.value_type().contains(ValueType::String)
114        {
115            return Ok(());
116        }
117
118        if value.is_none() {
119            return Ok(());
120        }
121
122        // apply rules based on key/path
123        self.apply_all_rules(meta, state, None)
124    }
125
126    fn process_array<T>(
127        &mut self,
128        array: &mut Array<T>,
129        _meta: &mut Meta,
130        state: &ProcessingState<'_>,
131    ) -> ProcessingResult
132    where
133        T: ProcessValue,
134    {
135        if is_pairlist(array) {
136            for annotated in array {
137                let mut mapped = mem::take(annotated).map_value(T::into_value);
138
139                if let Some(Value::Array(pair)) = mapped.value_mut() {
140                    let mut value = mem::take(&mut pair[1]);
141                    let value_type = ValueType::for_field(&value);
142
143                    if let Some(key_name) = &pair[0].as_str() {
144                        // We enter the key of the first element of the array, since we treat it
145                        // as a pair.
146                        let key_state =
147                            state.enter_borrowed(key_name, state.inner_attrs(), value_type);
148                        // We process the value with a state that "simulates" the first value of the
149                        // array as if it was the key of a dictionary.
150                        process_value(&mut value, self, &key_state)?;
151                    }
152
153                    // Put value back into pair.
154                    pair[1] = value;
155                }
156
157                // Put pair back into array.
158                *annotated = T::from_value(mapped);
159            }
160
161            Ok(())
162        } else {
163            // If we didn't find a pairlist, we can process child values as normal.
164            array.process_child_values(self, state)
165        }
166    }
167
168    fn process_string(
169        &mut self,
170        value: &mut String,
171        meta: &mut Meta,
172        state: &ProcessingState<'_>,
173    ) -> ProcessingResult {
174        if let "" | "true" | "false" | "null" | "undefined" = value.as_str() {
175            return Ok(());
176        }
177
178        // same as before_process. duplicated here because we can only check for "true",
179        // "false" etc in process_string.
180        self.apply_all_rules(meta, state, Some(value))
181    }
182
183    fn process_native_image_path(
184        &mut self,
185        NativeImagePath(value): &mut NativeImagePath,
186        meta: &mut Meta,
187        state: &ProcessingState<'_>,
188    ) -> ProcessingResult {
189        // In NativeImagePath we must not strip the file's basename because that would break
190        // processing.
191        //
192        // We pop the basename from the end of the string, call process_string and push the
193        // basename again.
194        //
195        // The ranges in Meta should still be right as long as we only pop/push from the end of the
196        // string. If we decide that we need to preserve anything other than suffixes all PII
197        // tooltips/annotations are potentially wrong.
198
199        if let Some(index) = value.rfind(['/', '\\']) {
200            let basename = value.split_off(index);
201            match self.process_string(value, meta, state) {
202                Ok(()) => value.push_str(&basename),
203                Err(
204                    ProcessingAction::DeleteValueHard
205                    | ProcessingAction::DeleteValueWithRemark(_)
206                    | ProcessingAction::DeleteValueSoft,
207                ) => {
208                    basename[1..].clone_into(value);
209                }
210                Err(ProcessingAction::InvalidTransaction(x)) => {
211                    return Err(ProcessingAction::InvalidTransaction(x));
212                }
213            }
214        }
215
216        Ok(())
217    }
218
219    fn process_pairlist<T: ProcessValue + AsPair>(
220        &mut self,
221        value: &mut PairList<T>,
222        _meta: &mut Meta,
223        state: &ProcessingState,
224    ) -> ProcessingResult {
225        utils::process_pairlist(self, value, state)
226    }
227
228    fn process_attributes(
229        &mut self,
230        value: &mut relay_event_schema::protocol::Attributes,
231        _meta: &mut Meta,
232        state: &ProcessingState,
233    ) -> ProcessingResult {
234        match self.attribute_mode {
235            // Treat each attribute as an object and just process them field by field.
236            AttributeMode::Object => value.process_child_values(self, state),
237            // Identify each attribute with its `value` and only process that.
238            AttributeMode::ValueOnly => {
239                for (key, attribute) in value.0.iter_mut() {
240                    let Some(attribute) = attribute.value_mut() else {
241                        continue;
242                    };
243
244                    // We need some manual state management here because we're bypassing all the
245                    // intermediate structures and pointing at the value directly. This essentially
246                    // mimics the attributes and value type that the metastructure derivation would
247                    // produce for the attribute vaue.
248                    let attrs = FieldAttrs::new()
249                        .pii_dynamic(relay_event_schema::protocol::attribute_pii_from_conventions);
250                    let inner_value = &mut attribute.value.value;
251                    let inner_value_type = ValueType::for_field(inner_value);
252                    let entered =
253                        state.enter_borrowed(key, Some(Cow::Borrowed(&attrs)), inner_value_type);
254
255                    processor::process_value(inner_value, self, &entered)?;
256                    self.process_other(&mut attribute.other, state)?;
257                }
258                Ok(())
259            }
260        }
261    }
262
263    fn process_user(
264        &mut self,
265        user: &mut User,
266        _meta: &mut Meta,
267        state: &ProcessingState<'_>,
268    ) -> ProcessingResult {
269        let ip_was_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
270
271        // Recurse into the user and does PII processing on fields.
272        user.process_child_values(self, state)?;
273
274        let has_other_fields = user.id.value().is_some()
275            || user.username.value().is_some()
276            || user.email.value().is_some();
277
278        let ip_is_still_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
279
280        // If the IP address has become invalid as part of PII processing, we move it into the user
281        // ID. That ensures people can do IP hashing and still have a correct users-affected count.
282        //
283        // Right now both Snuba and EventUser discard unparseable IPs for indexing, and we assume
284        // we want to keep it that way.
285        //
286        // If there are any other fields set that take priority over the IP for uniquely
287        // identifying a user (has_other_fields), we do not want to do anything. The value will be
288        // wiped out in renormalization anyway.
289        if ip_was_valid && !has_other_fields && !ip_is_still_valid {
290            user.id = mem::take(&mut user.ip_address).map_value(|ip| ip.into_inner().into());
291            user.ip_address.meta_mut().add_remark(Remark::new(
292                RemarkType::Removed,
293                "pii:ip_address".to_owned(),
294            ));
295        }
296
297        Ok(())
298    }
299
300    // Replay PII processor entry point.
301    fn process_replay(
302        &mut self,
303        replay: &mut Replay,
304        _meta: &mut Meta,
305        state: &ProcessingState<'_>,
306    ) -> ProcessingResult {
307        replay.process_child_values(self, state)?;
308        Ok(())
309    }
310}
311
312#[derive(Default)]
313struct PairListProcessor {
314    is_pair: bool,
315    has_string_key: bool,
316}
317
318impl PairListProcessor {
319    /// Returns true if the processor identified the supplied data as an array composed of
320    /// a key (string) and a value.
321    fn is_pair_array(&self) -> bool {
322        self.is_pair && self.has_string_key
323    }
324}
325
326impl Processor for PairListProcessor {
327    fn process_array<T>(
328        &mut self,
329        value: &mut Array<T>,
330        _meta: &mut Meta,
331        state: &ProcessingState<'_>,
332    ) -> ProcessingResult
333    where
334        T: ProcessValue,
335    {
336        self.is_pair = state.depth() == 0 && value.len() == 2;
337        if self.is_pair {
338            let key_type = ValueType::for_field(&value[0]);
339            process_value(
340                &mut value[0],
341                self,
342                &state.enter_index(0, state.inner_attrs(), key_type),
343            )?;
344        }
345
346        Ok(())
347    }
348
349    fn process_string(
350        &mut self,
351        _value: &mut String,
352        _meta: &mut Meta,
353        state: &ProcessingState<'_>,
354    ) -> ProcessingResult where {
355        if state.depth() == 1 && state.path().index() == Some(0) {
356            self.has_string_key = true;
357        }
358
359        Ok(())
360    }
361}
362
363fn is_pairlist<T: ProcessValue>(array: &mut Array<T>) -> bool {
364    for element in array.iter_mut() {
365        let mut visitor = PairListProcessor::default();
366        process_value(element, &mut visitor, ProcessingState::root()).ok();
367        if !visitor.is_pair_array() {
368            return false;
369        }
370    }
371
372    !array.is_empty()
373}
374
375/// Scrubs GraphQL variables from the event.
376pub fn scrub_graphql(event: &mut Event) {
377    let mut keys: BTreeSet<&str> = BTreeSet::new();
378
379    let mut is_graphql = false;
380
381    // Collect the variables keys and scrub them out.
382    if let Some(request) = event.request.value_mut()
383        && let Some(Value::Object(data)) = request.data.value_mut()
384    {
385        if let Some(api_target) = request.api_target.value()
386            && api_target.eq_ignore_ascii_case("graphql")
387        {
388            is_graphql = true;
389        }
390
391        if is_graphql
392            && let Some(Annotated(Some(Value::Object(variables)), _)) = data.get_mut("variables")
393        {
394            for (key, value) in variables.iter_mut() {
395                keys.insert(key);
396                value.set_value(Some(Value::String("[Filtered]".to_owned())));
397            }
398        }
399    }
400
401    if !is_graphql {
402        return;
403    }
404
405    // Scrub PII from the data object if they match the variables keys.
406    if let Some(contexts) = event.contexts.value_mut()
407        && let Some(response) = contexts.get_mut::<ResponseContext>()
408        && let Some(Value::Object(data)) = response.data.value_mut()
409        && let Some(Annotated(Some(Value::Object(graphql_data)), _)) = data.get_mut("data")
410    {
411        if !keys.is_empty() {
412            scrub_graphql_data(&keys, graphql_data);
413        } else {
414            // If we don't have the variable keys, we scrub the whole data object
415            // because the query or mutation weren't parameterized.
416            data.remove("data");
417        }
418    }
419}
420
421/// Scrubs values from the data object to `[Filtered]`.
422fn scrub_graphql_data(keys: &BTreeSet<&str>, data: &mut BTreeMap<String, Annotated<Value>>) {
423    for (key, value) in data.iter_mut() {
424        match value.value_mut() {
425            Some(Value::Object(item_data)) => {
426                scrub_graphql_data(keys, item_data);
427            }
428            _ => {
429                if keys.contains(key.as_str()) {
430                    value.set_value(Some(Value::String("[Filtered]".to_owned())));
431                }
432            }
433        }
434    }
435}
436
437fn apply_rule_to_value(
438    meta: &mut Meta,
439    rule: &RuleRef,
440    key: Option<&str>,
441    mut value: Option<&mut String>,
442) -> ProcessingResult {
443    // The rule might specify to remove or to redact. If redaction is chosen, we need to
444    // chunk up the value, otherwise we need to simply mark the value for deletion.
445    let should_redact_chunks = !matches!(rule.redaction, Redaction::Default | Redaction::Remove);
446
447    // In case the value is not a string (but a container, bool or number) and the rule matches on
448    // anything, we can only remove the value (not replace, hash, etc).
449    if rule.ty == RuleType::Anything && (value.is_none() || !should_redact_chunks) {
450        // The value is a container, @anything on a container can do nothing but delete.
451        meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
452        return Err(ProcessingAction::DeleteValueHard);
453    }
454
455    macro_rules! apply_regex {
456        ($regex:expr, $replace_behavior:expr) => {
457            if let Some(ref mut value) = value {
458                processor::process_chunked_value(value, meta, |chunks| {
459                    apply_regex_to_chunks(chunks, rule, $regex, $replace_behavior)
460                });
461            }
462        };
463    }
464
465    for (pattern_type, regex, replace_behavior) in regexes::get_regex_for_rule_type(&rule.ty) {
466        if matches!(pattern_type, PatternType::Key | PatternType::KeyValue)
467            && key.is_some_and(|key| regex.is_match(key))
468        {
469            if value.is_some() && should_redact_chunks {
470                // If we're given a string value here, redact the value like we would with
471                // @anything.
472                apply_regex!(&ANYTHING_REGEX, replace_behavior);
473            } else {
474                meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
475                return Err(ProcessingAction::DeleteValueHard);
476            }
477        } else if matches!(pattern_type, PatternType::Value | PatternType::KeyValue) {
478            apply_regex!(regex, replace_behavior);
479        }
480    }
481
482    Ok(())
483}
484
485fn apply_regex_to_chunks<'a>(
486    chunks: Vec<Chunk<'a>>,
487    rule: &RuleRef,
488    regex: &Regex,
489    replace_behavior: ReplaceBehavior,
490) -> Vec<Chunk<'a>> {
491    // NB: This function allocates the entire string and all chunks a second time. This means it
492    // cannot reuse chunks and reallocates them. Ideally, we would be able to run the regex directly
493    // on the chunks, but the `regex` crate does not support that.
494
495    let mut search_string = String::new();
496    let mut has_text = false;
497    for chunk in &chunks {
498        match chunk {
499            Chunk::Text { text } => {
500                has_text = true;
501                search_string.push_str(&text.replace('\x00', ""));
502            }
503            Chunk::Redaction { .. } => search_string.push('\x00'),
504        }
505    }
506
507    if !has_text {
508        // Nothing to replace.
509        return chunks;
510    }
511
512    // Early exit if this regex does not match and return the original chunks.
513    let mut captures_iter = regex.captures_iter(&search_string).peekable();
514    if captures_iter.peek().is_none() {
515        return chunks;
516    }
517
518    let mut replacement_chunks = vec![];
519    for chunk in chunks {
520        if let Chunk::Redaction { .. } = chunk {
521            replacement_chunks.push(chunk);
522        }
523    }
524    replacement_chunks.reverse();
525
526    fn process_text<'a>(
527        text: &str,
528        rv: &mut Vec<Chunk<'a>>,
529        replacement_chunks: &mut Vec<Chunk<'a>>,
530    ) {
531        if text.is_empty() {
532            return;
533        }
534
535        // ALERT: This logic assumes that `regex` doesn't match a capture
536        // group starting on a null byte. If you get an error in debug mode
537        // about `replacement_chunks` not being empty, check the regex.
538        static NULL_SPLIT_RE: OnceLock<Regex> = OnceLock::new();
539        let regex = NULL_SPLIT_RE.get_or_init(|| {
540            #[allow(clippy::trivial_regex)]
541            Regex::new("\x00").unwrap()
542        });
543
544        let mut pos = 0;
545        for piece in regex.find_iter(text) {
546            rv.push(Chunk::Text {
547                text: Cow::Owned(text[pos..piece.start()].to_string()),
548            });
549            rv.push(replacement_chunks.pop().unwrap());
550            pos = piece.end();
551        }
552
553        rv.push(Chunk::Text {
554            text: Cow::Owned(text[pos..].to_string()),
555        });
556    }
557
558    let mut pos = 0;
559    let mut rv = Vec::with_capacity(replacement_chunks.len());
560
561    match replace_behavior {
562        ReplaceBehavior::Groups(ref groups) => {
563            for m in captures_iter {
564                for (idx, g) in m.iter().enumerate() {
565                    if let Some(g) = g
566                        && groups.contains(&(idx as u8))
567                    {
568                        process_text(
569                            &search_string[pos..g.start()],
570                            &mut rv,
571                            &mut replacement_chunks,
572                        );
573                        insert_replacement_chunks(rule, g.as_str(), &mut rv);
574                        pos = g.end();
575                    }
576                }
577            }
578            process_text(&search_string[pos..], &mut rv, &mut replacement_chunks);
579            debug_assert!(replacement_chunks.is_empty());
580        }
581        ReplaceBehavior::Value => {
582            // We only want to replace a string value, and the replacement chunk for that is
583            // inserted by insert_replacement_chunks. Adding chunks from replacement_chunks
584            // results in the incorrect behavior of a total of more chunks than the input.
585            insert_replacement_chunks(rule, &search_string, &mut rv);
586        }
587    }
588    rv
589}
590
591fn insert_replacement_chunks(rule: &RuleRef, text: &str, output: &mut Vec<Chunk<'_>>) {
592    match &rule.redaction {
593        Redaction::Default | Redaction::Remove => {
594            output.push(Chunk::Redaction {
595                text: Cow::Borrowed(""),
596                rule_id: Cow::Owned(rule.origin.to_string()),
597                ty: RemarkType::Removed,
598            });
599        }
600        Redaction::Mask => {
601            let buf = vec!['*'; text.chars().count()];
602
603            output.push(Chunk::Redaction {
604                ty: RemarkType::Masked,
605                rule_id: Cow::Owned(rule.origin.to_string()),
606                text: buf.into_iter().collect(),
607            })
608        }
609        Redaction::Hash => {
610            output.push(Chunk::Redaction {
611                ty: RemarkType::Pseudonymized,
612                rule_id: Cow::Owned(rule.origin.to_string()),
613                text: Cow::Owned(utils::hash_value(text.as_bytes())),
614            });
615        }
616        Redaction::Replace(replace) => {
617            output.push(Chunk::Redaction {
618                ty: RemarkType::Substituted,
619                rule_id: Cow::Owned(rule.origin.to_string()),
620                text: Cow::Owned(replace.text.clone()),
621            });
622        }
623        Redaction::Other => relay_log::debug!("Incoming redaction is not supported"),
624    }
625}
626
627#[cfg(test)]
628mod tests {
629    use insta::{allow_duplicates, assert_debug_snapshot};
630    use relay_event_schema::processor::process_value;
631    use relay_event_schema::protocol::{
632        Addr, Breadcrumb, DebugImage, DebugMeta, ExtraValue, Headers, LogEntry, Message,
633        NativeDebugImage, Request, Span, TagEntry, Tags, TraceContext,
634    };
635    use relay_protocol::{FromValue, Object, assert_annotated_snapshot, get_value};
636    use serde_json::json;
637
638    use super::*;
639    use crate::{DataScrubbingConfig, PiiConfig, ReplaceRedaction};
640
641    fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Option<PiiConfig> {
642        use crate::convert::to_pii_config as to_pii_config_impl;
643        let rv = to_pii_config_impl(datascrubbing_config);
644        if let Some(ref config) = rv {
645            let roundtrip: PiiConfig =
646                serde_json::from_value(serde_json::to_value(config).unwrap()).unwrap();
647            assert_eq!(&roundtrip, config);
648        }
649        rv
650    }
651
652    #[test]
653    fn test_scrub_original_value() {
654        let mut data = Event::from_value(
655            json!({
656                "user": {
657                    "username": "hey  man 73.133.27.120", // should be stripped despite not being "known ip field"
658                    "ip_address": "is this an ip address? 73.133.27.120", //  <--------
659                },
660                "hpkp":"invalid data my ip address is  74.133.27.120 and my credit card number is  4571234567890111 ",
661            })
662            .into(),
663        );
664
665        let scrubbing_config = DataScrubbingConfig {
666            scrub_data: true,
667            scrub_ip_addresses: true,
668            scrub_defaults: true,
669            ..Default::default()
670        };
671
672        let pii_config = to_pii_config(&scrubbing_config).unwrap();
673        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
674
675        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
676
677        assert_debug_snapshot!(&data);
678    }
679
680    #[test]
681    fn test_sentry_user() {
682        let mut data = Event::from_value(
683            json!({
684                "user": {
685                    "ip_address": "73.133.27.120",
686                    "sentry_user": "ip:73.133.27.120",
687                },
688            })
689            .into(),
690        );
691
692        let scrubbing_config = DataScrubbingConfig {
693            scrub_data: true,
694            scrub_ip_addresses: true,
695            scrub_defaults: true,
696            ..Default::default()
697        };
698
699        let pii_config = to_pii_config(&scrubbing_config).unwrap();
700        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
701
702        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
703
704        assert_debug_snapshot!(&data);
705    }
706
707    #[test]
708    fn test_basic_stripping() {
709        let config = serde_json::from_str::<PiiConfig>(
710            r#"
711            {
712                "rules": {
713                    "remove_bad_headers": {
714                        "type": "redact_pair",
715                        "keyPattern": "(?i)cookie|secret[-_]?key"
716                    }
717                },
718                "applications": {
719                    "$string": ["@ip"],
720                    "$object.**": ["remove_bad_headers"]
721                }
722            }
723            "#,
724        )
725        .unwrap();
726
727        let mut event = Annotated::new(Event {
728            logentry: Annotated::new(LogEntry {
729                formatted: Annotated::new("Hello world!".to_owned().into()),
730                ..Default::default()
731            }),
732            request: Annotated::new(Request {
733                env: {
734                    let mut rv = Object::new();
735                    rv.insert(
736                        "SECRET_KEY".to_owned(),
737                        Annotated::new(Value::String("134141231231231231231312".into())),
738                    );
739                    Annotated::new(rv)
740                },
741                headers: {
742                    let rv = vec![
743                        Annotated::new((
744                            Annotated::new("Cookie".to_owned().into()),
745                            Annotated::new("super secret".to_owned().into()),
746                        )),
747                        Annotated::new((
748                            Annotated::new("X-Forwarded-For".to_owned().into()),
749                            Annotated::new("127.0.0.1".to_owned().into()),
750                        )),
751                    ];
752                    Annotated::new(Headers(PairList(rv)))
753                },
754                ..Default::default()
755            }),
756            tags: Annotated::new(Tags(
757                vec![Annotated::new(TagEntry(
758                    Annotated::new("forwarded_for".to_owned()),
759                    Annotated::new("127.0.0.1".to_owned()),
760                ))]
761                .into(),
762            )),
763            ..Default::default()
764        });
765
766        let mut processor = PiiProcessor::new(config.compiled());
767        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
768        assert_annotated_snapshot!(event);
769    }
770
771    #[test]
772    fn test_redact_containers() {
773        let config = serde_json::from_str::<PiiConfig>(
774            r#"
775            {
776                "applications": {
777                    "$object": ["@anything"]
778                }
779            }
780            "#,
781        )
782        .unwrap();
783
784        let mut event = Annotated::new(Event {
785            extra: {
786                let mut map = Object::new();
787                map.insert(
788                    "foo".to_owned(),
789                    Annotated::new(ExtraValue(Value::String("bar".to_owned()))),
790                );
791                Annotated::new(map)
792            },
793            ..Default::default()
794        });
795
796        let mut processor = PiiProcessor::new(config.compiled());
797        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
798        assert_annotated_snapshot!(event);
799    }
800
801    #[test]
802    fn test_redact_custom_pattern() {
803        let config = serde_json::from_str::<PiiConfig>(
804            r#"
805            {
806                "applications": {
807                    "$string": ["myrule"]
808                },
809                "rules": {
810                    "myrule": {
811                        "type": "pattern",
812                        "pattern": "foo",
813                        "redaction": {
814                            "method": "replace",
815                            "text": "asd"
816                        }
817                    }
818                }
819            }
820            "#,
821        )
822        .unwrap();
823
824        let mut event = Annotated::new(Event {
825            extra: {
826                let mut map = Object::new();
827                map.insert(
828                    "myvalue".to_owned(),
829                    Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
830                );
831                Annotated::new(map)
832            },
833            ..Default::default()
834        });
835
836        let mut processor = PiiProcessor::new(config.compiled());
837        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
838        assert_annotated_snapshot!(event);
839    }
840
841    #[test]
842    fn test_redact_custom_negative_pattern() {
843        let config = serde_json::from_str::<PiiConfig>(
844            r#"
845            {
846                "applications": {
847                    "$string": ["myrule"]
848                },
849                "rules": {
850                    "myrule": {
851                        "type": "pattern",
852                        "pattern": "the good string|.*OK.*|(.*)",
853                        "replaceGroups": [1],
854                        "redaction": {
855                            "method": "mask"
856                        }
857                    }
858                }
859            }
860            "#,
861        )
862        .unwrap();
863
864        let mut event = Annotated::<Event>::from_json(
865            r#"{
866            "extra": {
867                "1": "the good string",
868                "2": "a bad string",
869                "3": "another OK string",
870                "4": "another bad one"
871            }
872        }"#,
873        )
874        .unwrap();
875
876        let mut processor = PiiProcessor::new(config.compiled());
877        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
878        assert_annotated_snapshot!(event.value().unwrap().extra, @r#"
879        {
880          "1": "the good string",
881          "2": "************",
882          "3": "another OK string",
883          "4": "***************",
884          "_meta": {
885            "2": {
886              "": {
887                "rem": [
888                  [
889                    "myrule",
890                    "m",
891                    0,
892                    12
893                  ]
894                ],
895                "len": 12
896              }
897            },
898            "4": {
899              "": {
900                "rem": [
901                  [
902                    "myrule",
903                    "m",
904                    0,
905                    15
906                  ]
907                ],
908                "len": 15
909              }
910            }
911          }
912        }
913        "#);
914    }
915
916    #[test]
917    fn test_no_field_upsert() {
918        let config = serde_json::from_str::<PiiConfig>(
919            r#"
920            {
921                "applications": {
922                    "**": ["@anything:remove"]
923                }
924            }
925            "#,
926        )
927        .unwrap();
928
929        let mut event = Annotated::new(Event {
930            extra: {
931                let mut map = Object::new();
932                map.insert(
933                    "myvalue".to_owned(),
934                    Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
935                );
936                Annotated::new(map)
937            },
938            ..Default::default()
939        });
940
941        let mut processor = PiiProcessor::new(config.compiled());
942        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
943        assert_annotated_snapshot!(event);
944    }
945
946    #[test]
947    fn test_anything_hash_on_string() {
948        let config = serde_json::from_str::<PiiConfig>(
949            r#"
950            {
951                "applications": {
952                    "$string": ["@anything:hash"]
953                }
954            }
955            "#,
956        )
957        .unwrap();
958
959        let mut event = Annotated::new(Event {
960            extra: {
961                let mut map = Object::new();
962                map.insert(
963                    "myvalue".to_owned(),
964                    Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
965                );
966                Annotated::new(map)
967            },
968            ..Default::default()
969        });
970
971        let mut processor = PiiProcessor::new(config.compiled());
972        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
973        assert_annotated_snapshot!(event);
974    }
975
976    #[test]
977    fn test_anything_hash_on_container() {
978        let config = serde_json::from_str::<PiiConfig>(
979            r#"
980            {
981                "applications": {
982                    "$object": ["@anything:hash"]
983                }
984            }
985            "#,
986        )
987        .unwrap();
988
989        let mut event = Annotated::new(Event {
990            extra: {
991                let mut map = Object::new();
992                map.insert(
993                    "myvalue".to_owned(),
994                    Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
995                );
996                Annotated::new(map)
997            },
998            ..Default::default()
999        });
1000
1001        let mut processor = PiiProcessor::new(config.compiled());
1002        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1003        assert_annotated_snapshot!(event);
1004    }
1005
1006    #[test]
1007    fn test_only_match_token_on_keys() {
1008        let mut data = Event::from_value(
1009            json!({
1010                "request": {
1011                    "headers": [
1012                        ["X-Token", "oof this is very sensitive"],
1013                        ["Token", "also bad"],
1014                    ]
1015                },
1016                "extra": {
1017                    "url": "foo.bar/endpoint?token=sensitive",
1018                    "url2": "foo.bar/endpoint?token_foobar=sensitive",
1019                    "aaa": "token:12345",
1020                    "foo-token-bar": "sensitive",
1021                    "llm": "token count",
1022                },
1023            })
1024            .into(),
1025        );
1026
1027        let scrubbing_config = DataScrubbingConfig {
1028            scrub_data: true,
1029            scrub_ip_addresses: true,
1030            scrub_defaults: true,
1031            ..Default::default()
1032        };
1033
1034        let pii_config = to_pii_config(&scrubbing_config).unwrap();
1035        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1036
1037        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1038
1039        assert_annotated_snapshot!(&data);
1040    }
1041
1042    #[test]
1043    fn test_ignore_user_agent_ip_scrubbing() {
1044        let mut data = Event::from_value(
1045            json!({
1046                "request": {
1047                    "headers": [
1048                        ["User-Agent", "127.0.0.1"],
1049                        ["X-Client-Ip", "10.0.0.1"]
1050                    ]
1051                },
1052            })
1053            .into(),
1054        );
1055
1056        let scrubbing_config = DataScrubbingConfig {
1057            scrub_data: true,
1058            scrub_ip_addresses: true,
1059            scrub_defaults: true,
1060            ..Default::default()
1061        };
1062
1063        let pii_config = to_pii_config(&scrubbing_config).unwrap();
1064        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1065
1066        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1067
1068        assert_annotated_snapshot!(&data);
1069    }
1070
1071    #[test]
1072    fn test_remove_debugmeta_path() {
1073        let config = serde_json::from_str::<PiiConfig>(
1074            r#"
1075            {
1076                "applications": {
1077                    "debug_meta.images.*.code_file": ["@anything:remove"],
1078                    "debug_meta.images.*.debug_file": ["@anything:remove"]
1079                }
1080            }
1081            "#,
1082        )
1083        .unwrap();
1084
1085        let mut event = Annotated::new(Event {
1086            debug_meta: Annotated::new(DebugMeta {
1087                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1088                    NativeDebugImage {
1089                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1090                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1091                        debug_id: Annotated::new(
1092                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1093                        ),
1094                        debug_file: Annotated::new("wntdll.pdb".into()),
1095                        debug_checksum: Annotated::empty(),
1096                        arch: Annotated::new("arm64".to_owned()),
1097                        image_addr: Annotated::new(Addr(0)),
1098                        image_size: Annotated::new(4096),
1099                        image_vmaddr: Annotated::new(Addr(32768)),
1100                        other: {
1101                            let mut map = Object::new();
1102                            map.insert(
1103                                "other".to_owned(),
1104                                Annotated::new(Value::String("value".to_owned())),
1105                            );
1106                            map
1107                        },
1108                    },
1109                )))]),
1110                ..Default::default()
1111            }),
1112            ..Default::default()
1113        });
1114
1115        let mut processor = PiiProcessor::new(config.compiled());
1116        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1117        assert_annotated_snapshot!(event);
1118    }
1119
1120    #[test]
1121    fn test_replace_debugmeta_path() {
1122        let config = serde_json::from_str::<PiiConfig>(
1123            r#"
1124            {
1125                "applications": {
1126                    "debug_meta.images.*.code_file": ["@anything:replace"],
1127                    "debug_meta.images.*.debug_file": ["@anything:replace"]
1128                }
1129            }
1130            "#,
1131        )
1132        .unwrap();
1133
1134        let mut event = Annotated::new(Event {
1135            debug_meta: Annotated::new(DebugMeta {
1136                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1137                    NativeDebugImage {
1138                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1139                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1140                        debug_id: Annotated::new(
1141                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1142                        ),
1143                        debug_file: Annotated::new("wntdll.pdb".into()),
1144                        debug_checksum: Annotated::empty(),
1145                        arch: Annotated::new("arm64".to_owned()),
1146                        image_addr: Annotated::new(Addr(0)),
1147                        image_size: Annotated::new(4096),
1148                        image_vmaddr: Annotated::new(Addr(32768)),
1149                        other: {
1150                            let mut map = Object::new();
1151                            map.insert(
1152                                "other".to_owned(),
1153                                Annotated::new(Value::String("value".to_owned())),
1154                            );
1155                            map
1156                        },
1157                    },
1158                )))]),
1159                ..Default::default()
1160            }),
1161            ..Default::default()
1162        });
1163
1164        let mut processor = PiiProcessor::new(config.compiled());
1165        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1166        assert_annotated_snapshot!(event);
1167    }
1168
1169    #[test]
1170    fn test_hash_debugmeta_path() {
1171        let config = serde_json::from_str::<PiiConfig>(
1172            r#"
1173            {
1174                "applications": {
1175                    "debug_meta.images.*.code_file": ["@anything:hash"],
1176                    "debug_meta.images.*.debug_file": ["@anything:hash"]
1177                }
1178            }
1179            "#,
1180        )
1181        .unwrap();
1182
1183        let mut event = Annotated::new(Event {
1184            debug_meta: Annotated::new(DebugMeta {
1185                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1186                    NativeDebugImage {
1187                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1188                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1189                        debug_id: Annotated::new(
1190                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1191                        ),
1192                        debug_file: Annotated::new("wntdll.pdb".into()),
1193                        debug_checksum: Annotated::empty(),
1194                        arch: Annotated::new("arm64".to_owned()),
1195                        image_addr: Annotated::new(Addr(0)),
1196                        image_size: Annotated::new(4096),
1197                        image_vmaddr: Annotated::new(Addr(32768)),
1198                        other: {
1199                            let mut map = Object::new();
1200                            map.insert(
1201                                "other".to_owned(),
1202                                Annotated::new(Value::String("value".to_owned())),
1203                            );
1204                            map
1205                        },
1206                    },
1207                )))]),
1208                ..Default::default()
1209            }),
1210            ..Default::default()
1211        });
1212
1213        let mut processor = PiiProcessor::new(config.compiled());
1214        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1215        assert_annotated_snapshot!(event);
1216    }
1217
1218    #[test]
1219    fn test_debugmeta_path_not_addressible_with_wildcard_selector() {
1220        let config = serde_json::from_str::<PiiConfig>(
1221            r#"
1222            {
1223                "applications": {
1224                    "$string": ["@anything:remove"],
1225                    "**": ["@anything:remove"],
1226                    "debug_meta.**": ["@anything:remove"],
1227                    "(debug_meta.images.**.code_file & $string)": ["@anything:remove"]
1228                }
1229            }
1230            "#,
1231        )
1232        .unwrap();
1233
1234        let mut event = Annotated::new(Event {
1235            debug_meta: Annotated::new(DebugMeta {
1236                images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1237                    NativeDebugImage {
1238                        code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1239                        code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1240                        debug_id: Annotated::new(
1241                            "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1242                        ),
1243                        debug_file: Annotated::new("wntdll.pdb".into()),
1244                        debug_checksum: Annotated::empty(),
1245                        arch: Annotated::new("arm64".to_owned()),
1246                        image_addr: Annotated::new(Addr(0)),
1247                        image_size: Annotated::new(4096),
1248                        image_vmaddr: Annotated::new(Addr(32768)),
1249                        other: {
1250                            let mut map = Object::new();
1251                            map.insert(
1252                                "other".to_owned(),
1253                                Annotated::new(Value::String("value".to_owned())),
1254                            );
1255                            map
1256                        },
1257                    },
1258                )))]),
1259                ..Default::default()
1260            }),
1261            ..Default::default()
1262        });
1263
1264        let mut processor = PiiProcessor::new(config.compiled());
1265        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1266        assert_annotated_snapshot!(event);
1267    }
1268
1269    #[test]
1270    fn test_quoted_keys() {
1271        let config = serde_json::from_str::<PiiConfig>(
1272            r#"
1273            {
1274                "applications": {
1275                    "extra.'special ,./<>?!@#$%^&*())''gärbage'''": ["@anything:remove"]
1276                }
1277            }
1278            "#,
1279        )
1280        .unwrap();
1281
1282        let mut event = Annotated::new(Event {
1283            extra: {
1284                let mut map = Object::new();
1285                map.insert(
1286                    "do not ,./<>?!@#$%^&*())'ßtrip'".to_owned(),
1287                    Annotated::new(ExtraValue(Value::String("foo".to_owned()))),
1288                );
1289                map.insert(
1290                    "special ,./<>?!@#$%^&*())'gärbage'".to_owned(),
1291                    Annotated::new(ExtraValue(Value::String("bar".to_owned()))),
1292                );
1293                Annotated::new(map)
1294            },
1295            ..Default::default()
1296        });
1297
1298        let mut processor = PiiProcessor::new(config.compiled());
1299        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1300        assert_annotated_snapshot!(event);
1301    }
1302
1303    #[test]
1304    fn test_logentry_value_types() {
1305        // Assert that logentry.formatted is addressable as $string, $message and $logentry.formatted.
1306        for formatted_selector in &[
1307            "$logentry.formatted",
1308            "$message",
1309            "$logentry.formatted && $message",
1310            "$string",
1311        ] {
1312            let config = serde_json::from_str::<PiiConfig>(&format!(
1313                r##"
1314                {{
1315                    "applications": {{
1316                        "{formatted_selector}": ["@anything:remove"]
1317                    }}
1318                }}
1319                "##
1320            ))
1321            .unwrap();
1322
1323            let mut event = Annotated::new(Event {
1324                logentry: Annotated::new(LogEntry {
1325                    formatted: Annotated::new("Hello world!".to_owned().into()),
1326                    ..Default::default()
1327                }),
1328                ..Default::default()
1329            });
1330
1331            let mut processor = PiiProcessor::new(config.compiled());
1332            process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1333            assert!(
1334                event
1335                    .value()
1336                    .unwrap()
1337                    .logentry
1338                    .value()
1339                    .unwrap()
1340                    .formatted
1341                    .value()
1342                    .is_none()
1343            );
1344        }
1345    }
1346
1347    #[test]
1348    fn test_logentry_formatted_never_fully_filtered() {
1349        // Test that logentry.formatted gets smart PII scrubbing via to_pii_config
1350        // and is never completely filtered even with aggressive PII rules
1351        let config = crate::convert::to_pii_config(&crate::DataScrubbingConfig {
1352            scrub_data: true,
1353            scrub_defaults: true,
1354            scrub_ip_addresses: true,
1355            ..Default::default()
1356        })
1357        .unwrap();
1358
1359        let mut event = Annotated::new(Event {
1360            logentry: Annotated::new(LogEntry {
1361                formatted: Annotated::new(
1362                    "User john.doe@company.com failed login with card 4111-1111-1111-1111"
1363                        .to_owned()
1364                        .into(),
1365                ),
1366                ..Default::default()
1367            }),
1368            ..Default::default()
1369        });
1370
1371        let mut processor = PiiProcessor::new(config.compiled());
1372        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1373        assert_annotated_snapshot!(event, @r#"
1374        {
1375          "logentry": {
1376            "formatted": "User [email] failed login with card [creditcard]"
1377          },
1378          "_meta": {
1379            "logentry": {
1380              "formatted": {
1381                "": {
1382                  "rem": [
1383                    [
1384                      "@email:replace",
1385                      "s",
1386                      5,
1387                      12
1388                    ],
1389                    [
1390                      "@creditcard:replace",
1391                      "s",
1392                      36,
1393                      48
1394                    ]
1395                  ],
1396                  "len": 68
1397                }
1398              }
1399            }
1400          }
1401        }
1402        "#);
1403    }
1404
1405    #[test]
1406    fn test_logentry_formatted_bearer_token_scrubbing() {
1407        // Test that bearer tokens are properly scrubbed in logentry.formatted
1408        let config = crate::convert::to_pii_config(&crate::DataScrubbingConfig {
1409            scrub_data: true,
1410            scrub_defaults: true,
1411            ..Default::default()
1412        })
1413        .unwrap();
1414
1415        let mut event = Annotated::new(Event {
1416            logentry: Annotated::new(LogEntry {
1417                formatted: Annotated::new(
1418                    "API request failed with Bearer ABC123XYZ789TOKEN and other data"
1419                        .to_owned()
1420                        .into(),
1421                ),
1422                ..Default::default()
1423            }),
1424            ..Default::default()
1425        });
1426
1427        let mut processor = PiiProcessor::new(config.compiled());
1428        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1429        assert_annotated_snapshot!(event, @r#"
1430        {
1431          "logentry": {
1432            "formatted": "API request failed with Bearer [token] and other data"
1433          },
1434          "_meta": {
1435            "logentry": {
1436              "formatted": {
1437                "": {
1438                  "rem": [
1439                    [
1440                      "@bearer:replace",
1441                      "s",
1442                      24,
1443                      38
1444                    ]
1445                  ],
1446                  "len": 63
1447                }
1448              }
1449            }
1450          }
1451        }
1452        "#);
1453    }
1454
1455    #[test]
1456    fn test_logentry_formatted_password_word_not_scrubbed() {
1457        let config = PiiConfig::default();
1458        let mut event = Annotated::new(Event {
1459            logentry: Annotated::new(LogEntry {
1460                formatted: Annotated::new(
1461                    "User password is secret123 for authentication"
1462                        .to_owned()
1463                        .into(),
1464                ),
1465                ..Default::default()
1466            }),
1467            ..Default::default()
1468        });
1469
1470        let mut processor = PiiProcessor::new(config.compiled());
1471        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1472        assert_annotated_snapshot!(event, @r#"
1473        {
1474          "logentry": {
1475            "formatted": "User password is secret123 for authentication"
1476          }
1477        }
1478        "#);
1479    }
1480
1481    #[test]
1482    fn test_ip_address_hashing() {
1483        let config = serde_json::from_str::<PiiConfig>(
1484            r#"
1485            {
1486                "applications": {
1487                    "$user.ip_address": ["@ip:hash"]
1488                }
1489            }
1490            "#,
1491        )
1492        .unwrap();
1493
1494        let mut event = Annotated::new(Event {
1495            user: Annotated::new(User {
1496                ip_address: Annotated::new(IpAddr("127.0.0.1".to_owned())),
1497                ..Default::default()
1498            }),
1499            ..Default::default()
1500        });
1501
1502        let mut processor = PiiProcessor::new(config.compiled());
1503        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1504
1505        let user = event.value().unwrap().user.value().unwrap();
1506
1507        assert!(user.ip_address.value().is_none());
1508
1509        assert_eq!(
1510            user.id.value().unwrap().as_str(),
1511            "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1512        );
1513    }
1514
1515    #[test]
1516    fn test_ip_address_hashing_does_not_overwrite_id() {
1517        let config = serde_json::from_str::<PiiConfig>(
1518            r#"
1519            {
1520                "applications": {
1521                    "$user.ip_address": ["@ip:hash"]
1522                }
1523            }
1524            "#,
1525        )
1526        .unwrap();
1527
1528        let mut event = Annotated::new(Event {
1529            user: Annotated::new(User {
1530                id: Annotated::new("123".to_owned().into()),
1531                ip_address: Annotated::new(IpAddr("127.0.0.1".to_owned())),
1532                ..Default::default()
1533            }),
1534            ..Default::default()
1535        });
1536
1537        let mut processor = PiiProcessor::new(config.compiled());
1538        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1539
1540        let user = event.value().unwrap().user.value().unwrap();
1541
1542        // This will get wiped out in renormalization though
1543        assert_eq!(
1544            user.ip_address.value().unwrap().as_str(),
1545            "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1546        );
1547
1548        assert_eq!(user.id.value().unwrap().as_str(), "123");
1549    }
1550
1551    #[test]
1552    fn test_replace_replaced_text() {
1553        let chunks = vec![Chunk::Redaction {
1554            text: "[ip]".into(),
1555            rule_id: "@ip".into(),
1556            ty: RemarkType::Substituted,
1557        }];
1558        let rule = RuleRef {
1559            id: "@ip:replace".into(),
1560            origin: "@ip".into(),
1561            ty: RuleType::Ip,
1562            redaction: Redaction::Replace(ReplaceRedaction {
1563                text: "[ip]".into(),
1564            }),
1565        };
1566        let res = apply_regex_to_chunks(
1567            chunks.clone(),
1568            &rule,
1569            &Regex::new(r#".*"#).unwrap(),
1570            ReplaceBehavior::Value,
1571        );
1572        assert_eq!(chunks, res);
1573    }
1574
1575    #[test]
1576    fn test_replace_replaced_text_anything() {
1577        let chunks = vec![Chunk::Redaction {
1578            text: "[Filtered]".into(),
1579            rule_id: "@password:filter".into(),
1580            ty: RemarkType::Substituted,
1581        }];
1582        let rule = RuleRef {
1583            id: "@anything:filter".into(),
1584            origin: "@anything:filter".into(),
1585            ty: RuleType::Anything,
1586            redaction: Redaction::Replace(ReplaceRedaction {
1587                text: "[Filtered]".into(),
1588            }),
1589        };
1590        let res = apply_regex_to_chunks(
1591            chunks.clone(),
1592            &rule,
1593            &Regex::new(r#".*"#).unwrap(),
1594            ReplaceBehavior::Groups(smallvec::smallvec![0]),
1595        );
1596        assert_eq!(chunks, res);
1597    }
1598
1599    #[test]
1600    fn test_trace_route_params_scrubbed() {
1601        let mut trace_context: Annotated<TraceContext> = Annotated::from_json(
1602            r#"
1603            {
1604                "type": "trace",
1605                "trace_id": "4c79f60c11214eb38604f4ae0781bfb2",
1606                "span_id": "fa90fdead5f74052",
1607                "data": {
1608                    "previousRoute": {
1609                        "params": {
1610                            "password": "test"
1611                        }
1612                    }
1613                }
1614            }
1615            "#,
1616        )
1617        .unwrap();
1618
1619        let ds_config = DataScrubbingConfig {
1620            scrub_data: true,
1621            scrub_defaults: true,
1622            ..Default::default()
1623        };
1624        let pii_config = ds_config.pii_config().as_ref().unwrap();
1625        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1626
1627        process_value(
1628            &mut trace_context,
1629            &mut pii_processor,
1630            ProcessingState::root(),
1631        )
1632        .unwrap();
1633        assert_annotated_snapshot!(trace_context);
1634    }
1635
1636    #[test]
1637    fn test_scrub_span_data_http_not_scrubbed() {
1638        let mut span: Annotated<Span> = Annotated::from_json(
1639            r#"{
1640                "data": {
1641                    "http": {
1642                        "query": "dance=true"
1643                    }
1644                }
1645            }"#,
1646        )
1647        .unwrap();
1648
1649        let ds_config = DataScrubbingConfig {
1650            scrub_data: true,
1651            scrub_defaults: true,
1652            ..Default::default()
1653        };
1654        let pii_config = ds_config.pii_config().as_ref().unwrap();
1655        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1656
1657        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1658        assert_annotated_snapshot!(span);
1659    }
1660
1661    #[test]
1662    fn test_scrub_span_data_http_strings_are_scrubbed() {
1663        let mut span: Annotated<Span> = Annotated::from_json(
1664            r#"{
1665                "data": {
1666                    "http": {
1667                        "query": "ccnumber=5105105105105100&process_id=123",
1668                        "fragment": "ccnumber=5105105105105100,process_id=123"
1669                    }
1670                }
1671            }"#,
1672        )
1673        .unwrap();
1674
1675        let ds_config = DataScrubbingConfig {
1676            scrub_data: true,
1677            scrub_defaults: true,
1678            ..Default::default()
1679        };
1680        let pii_config = ds_config.pii_config().as_ref().unwrap();
1681        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1682
1683        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1684        assert_annotated_snapshot!(span);
1685    }
1686
1687    #[test]
1688    fn test_scrub_span_data_http_objects_are_scrubbed() {
1689        let mut span: Annotated<Span> = Annotated::from_json(
1690            r#"{
1691                "data": {
1692                    "http": {
1693                        "query": {
1694                            "ccnumber": "5105105105105100",
1695                            "process_id": "123"
1696                        },
1697                        "fragment": {
1698                            "ccnumber": "5105105105105100",
1699                            "process_id": "123"
1700                        }
1701                    }
1702                }
1703            }"#,
1704        )
1705        .unwrap();
1706
1707        let ds_config = DataScrubbingConfig {
1708            scrub_data: true,
1709            scrub_defaults: true,
1710            ..Default::default()
1711        };
1712        let pii_config = ds_config.pii_config().as_ref().unwrap();
1713        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1714
1715        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1716        assert_annotated_snapshot!(span);
1717    }
1718
1719    #[test]
1720    fn test_scrub_span_data_untyped_props_are_scrubbed() {
1721        let mut span: Annotated<Span> = Annotated::from_json(
1722            r#"{
1723                "data": {
1724                    "untyped": "ccnumber=5105105105105100",
1725                    "more_untyped": {
1726                        "typed": "no",
1727                        "scrubbed": "yes",
1728                        "ccnumber": "5105105105105100"
1729                    }
1730                }
1731            }"#,
1732        )
1733        .unwrap();
1734
1735        let ds_config = DataScrubbingConfig {
1736            scrub_data: true,
1737            scrub_defaults: true,
1738            ..Default::default()
1739        };
1740        let pii_config = ds_config.pii_config().as_ref().unwrap();
1741        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1742
1743        process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1744        assert_annotated_snapshot!(span);
1745    }
1746
1747    #[test]
1748    fn test_span_data_pii() {
1749        let mut span = Span::from_value(
1750            json!({
1751                "data": {
1752                    "code.filepath": "src/sentry/api/authentication.py",
1753                }
1754            })
1755            .into(),
1756        );
1757
1758        let ds_config = DataScrubbingConfig {
1759            scrub_data: true,
1760            scrub_defaults: true,
1761            ..Default::default()
1762        };
1763        let pii_config = ds_config.pii_config().as_ref().unwrap();
1764
1765        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1766        processor::process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1767        assert_eq!(
1768            get_value!(span.data.code_filepath!).as_str(),
1769            Some("src/sentry/api/authentication.py")
1770        );
1771    }
1772
1773    #[test]
1774    fn test_csp_source_file_pii() {
1775        let mut event = Event::from_value(
1776            json!({
1777                "csp": {
1778                    "source_file": "authentication.js",
1779                }
1780            })
1781            .into(),
1782        );
1783
1784        let config = serde_json::from_str::<PiiConfig>(
1785            r#"
1786            {
1787                "applications": {
1788                    "csp.source_file": ["@anything:filter"]
1789                }
1790            }
1791            "#,
1792        )
1793        .unwrap();
1794
1795        let mut pii_processor = PiiProcessor::new(config.compiled());
1796        processor::process_value(&mut event, &mut pii_processor, ProcessingState::root()).unwrap();
1797        assert_eq!(get_value!(event.csp.source_file!).as_str(), "[Filtered]");
1798    }
1799
1800    #[test]
1801    fn test_scrub_breadcrumb_data_http_not_scrubbed() {
1802        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1803            r#"{
1804                "data": {
1805                    "http": {
1806                        "query": "dance=true"
1807                    }
1808                }
1809            }"#,
1810        )
1811        .unwrap();
1812
1813        let ds_config = DataScrubbingConfig {
1814            scrub_data: true,
1815            scrub_defaults: true,
1816            ..Default::default()
1817        };
1818        let pii_config = ds_config.pii_config().as_ref().unwrap();
1819        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1820        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1821        assert_annotated_snapshot!(breadcrumb);
1822    }
1823
1824    #[test]
1825    fn test_scrub_breadcrumb_data_http_strings_are_scrubbed() {
1826        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1827            r#"{
1828                "data": {
1829                    "http": {
1830                        "query": "ccnumber=5105105105105100&process_id=123",
1831                        "fragment": "ccnumber=5105105105105100,process_id=123"
1832                    }
1833                }
1834            }"#,
1835        )
1836        .unwrap();
1837
1838        let ds_config = DataScrubbingConfig {
1839            scrub_data: true,
1840            scrub_defaults: true,
1841            ..Default::default()
1842        };
1843        let pii_config = ds_config.pii_config().as_ref().unwrap();
1844        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1845        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1846        assert_annotated_snapshot!(breadcrumb);
1847    }
1848
1849    #[test]
1850    fn test_scrub_breadcrumb_data_http_objects_are_scrubbed() {
1851        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1852            r#"{
1853                "data": {
1854                    "http": {
1855                        "query": {
1856                            "ccnumber": "5105105105105100",
1857                            "process_id": "123"
1858                        },
1859                        "fragment": {
1860                            "ccnumber": "5105105105105100",
1861                            "process_id": "123"
1862                        }
1863                    }
1864                }
1865            }"#,
1866        )
1867        .unwrap();
1868
1869        let ds_config = DataScrubbingConfig {
1870            scrub_data: true,
1871            scrub_defaults: true,
1872            ..Default::default()
1873        };
1874        let pii_config = ds_config.pii_config().as_ref().unwrap();
1875        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1876
1877        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1878        assert_annotated_snapshot!(breadcrumb);
1879    }
1880
1881    #[test]
1882    fn test_scrub_breadcrumb_data_untyped_props_are_scrubbed() {
1883        let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1884            r#"{
1885                "data": {
1886                    "untyped": "ccnumber=5105105105105100",
1887                    "more_untyped": {
1888                        "typed": "no",
1889                        "scrubbed": "yes",
1890                        "ccnumber": "5105105105105100"
1891                    }
1892                }
1893            }"#,
1894        )
1895        .unwrap();
1896
1897        let ds_config = DataScrubbingConfig {
1898            scrub_data: true,
1899            scrub_defaults: true,
1900            ..Default::default()
1901        };
1902        let pii_config = ds_config.pii_config().as_ref().unwrap();
1903        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1904        process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1905        assert_annotated_snapshot!(breadcrumb);
1906    }
1907
1908    #[test]
1909    fn test_scrub_graphql_response_data_with_variables() {
1910        let mut data = Event::from_value(
1911            json!({
1912              "request": {
1913                "data": {
1914                  "query": "{\n  viewer {\n    login\n  }\n}",
1915                  "variables": {
1916                    "login": "foo"
1917                  }
1918                },
1919                "api_target": "graphql"
1920              },
1921              "contexts": {
1922                "response": {
1923                  "type": "response",
1924                  "data": {
1925                    "data": {
1926                      "viewer": {
1927                        "login": "foo"
1928                      }
1929                    }
1930                  }
1931                }
1932              }
1933            })
1934            .into(),
1935        );
1936
1937        scrub_graphql(data.value_mut().as_mut().unwrap());
1938
1939        assert_debug_snapshot!(&data);
1940    }
1941
1942    #[test]
1943    fn test_scrub_graphql_response_data_without_variables() {
1944        let mut data = Event::from_value(
1945            json!({
1946              "request": {
1947                "data": {
1948                  "query": "{\n  viewer {\n    login\n  }\n}"
1949                },
1950                "api_target": "graphql"
1951              },
1952              "contexts": {
1953                "response": {
1954                  "type": "response",
1955                  "data": {
1956                    "data": {
1957                      "viewer": {
1958                        "login": "foo"
1959                      }
1960                    }
1961                  }
1962                }
1963              }
1964            })
1965            .into(),
1966        );
1967
1968        scrub_graphql(data.value_mut().as_mut().unwrap());
1969        assert_debug_snapshot!(&data);
1970    }
1971
1972    #[test]
1973    fn test_does_not_scrub_if_no_graphql() {
1974        let mut data = Event::from_value(
1975            json!({
1976              "request": {
1977                "data": {
1978                  "query": "{\n  viewer {\n    login\n  }\n}",
1979                  "variables": {
1980                    "login": "foo"
1981                  }
1982                },
1983              },
1984              "contexts": {
1985                "response": {
1986                  "type": "response",
1987                  "data": {
1988                    "data": {
1989                      "viewer": {
1990                        "login": "foo"
1991                      }
1992                    }
1993                  }
1994                }
1995              }
1996            })
1997            .into(),
1998        );
1999
2000        let scrubbing_config = DataScrubbingConfig {
2001            scrub_data: true,
2002            scrub_ip_addresses: true,
2003            scrub_defaults: true,
2004            ..Default::default()
2005        };
2006
2007        let pii_config = to_pii_config(&scrubbing_config).unwrap();
2008        let mut pii_processor = PiiProcessor::new(pii_config.compiled());
2009
2010        process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
2011
2012        assert_debug_snapshot!(&data);
2013    }
2014
2015    #[test]
2016    fn test_logentry_params_scrubbed() {
2017        let config = serde_json::from_str::<PiiConfig>(
2018            r##"
2019                {
2020                    "applications": {
2021                        "$string": ["@anything:remove"]
2022                    }
2023                }
2024                "##,
2025        )
2026        .unwrap();
2027
2028        let mut event = Annotated::new(Event {
2029            logentry: Annotated::new(LogEntry {
2030                message: Annotated::new(Message::from("failed to parse report id=%s".to_owned())),
2031                formatted: Annotated::new("failed to parse report id=1".to_owned().into()),
2032                params: Annotated::new(Value::Array(vec![Annotated::new(Value::String(
2033                    "12345".to_owned(),
2034                ))])),
2035                ..Default::default()
2036            }),
2037            ..Default::default()
2038        });
2039
2040        let mut processor = PiiProcessor::new(config.compiled());
2041        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2042
2043        let params = get_value!(event.logentry.params!);
2044        assert_debug_snapshot!(params, @r###"
2045        Array(
2046            [
2047                Meta {
2048                    remarks: [
2049                        Remark {
2050                            ty: Removed,
2051                            rule_id: "@anything:remove",
2052                            range: None,
2053                        },
2054                    ],
2055                    errors: [],
2056                    original_length: None,
2057                    original_value: None,
2058                },
2059            ],
2060        )
2061        "###);
2062    }
2063
2064    #[test]
2065    fn test_is_pairlist() {
2066        for (case, expected) in [
2067            (r#"[]"#, false),
2068            (r#"["foo"]"#, false),
2069            (r#"["foo", 123]"#, false),
2070            (r#"[[1, "foo"]]"#, false),
2071            (r#"[[["too_nested", 123]]]"#, false),
2072            (r#"[["foo", "bar"], [1, "foo"]]"#, false),
2073            (r#"[["foo", "bar"], ["foo", "bar", "baz"]]"#, false),
2074            (r#"[["foo", "bar", "baz"], ["foo", "bar"]]"#, false),
2075            (r#"["foo", ["bar", "baz"], ["foo", "bar"]]"#, false),
2076            (r#"[["foo", "bar"], [["too_nested", 123]]]"#, false),
2077            (r#"[["foo", 123]]"#, true),
2078            (r#"[["foo", "bar"]]"#, true),
2079            (
2080                r#"[["foo", "bar"], ["foo", {"nested": {"something": 1}}]]"#,
2081                true,
2082            ),
2083        ] {
2084            let v = Annotated::<Value>::from_json(case).unwrap();
2085            let Annotated(Some(Value::Array(mut a)), _) = v else {
2086                panic!()
2087            };
2088            assert_eq!(is_pairlist(&mut a), expected, "{case}");
2089        }
2090    }
2091
2092    #[test]
2093    fn test_tuple_array_scrubbed_with_path_selector() {
2094        // We expect that both of these configs express the same semantics.
2095        let configs = vec![
2096            // This configuration matches on the authorization element (the 1st element of the array
2097            // represents the key).
2098            r##"
2099                {
2100                    "applications": {
2101                        "exception.values.0.stacktrace.frames.0.vars.headers.authorization": ["@anything:replace"]
2102                    }
2103                }
2104                "##,
2105            // This configuration matches on the 2nd element of the array.
2106            r##"
2107                {
2108                    "applications": {
2109                        "exception.values.0.stacktrace.frames.0.vars.headers.0.1": ["@anything:replace"]
2110                    }
2111                }
2112                "##,
2113        ];
2114
2115        let mut event = Event::from_value(
2116            serde_json::json!(
2117            {
2118              "message": "hi",
2119              "exception": {
2120                "values": [
2121                  {
2122                    "type": "BrokenException",
2123                    "value": "Something failed",
2124                    "stacktrace": {
2125                      "frames": [
2126                        {
2127                            "vars": {
2128                                "headers": [
2129                                    ["authorization", "Bearer abc123"]
2130                                ]
2131                            }
2132                        }
2133                      ]
2134                    }
2135                  }
2136                ]
2137              }
2138            })
2139            .into(),
2140        );
2141
2142        for config in configs {
2143            let config = serde_json::from_str::<PiiConfig>(config).unwrap();
2144            let mut processor = PiiProcessor::new(config.compiled());
2145            process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2146
2147            let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
2148
2149            allow_duplicates!(assert_debug_snapshot!(vars, @r###"
2150                              FrameVars(
2151                                  {
2152                                      "headers": Array(
2153                                          [
2154                                              Array(
2155                                                  [
2156                                                      String(
2157                                                          "authorization",
2158                                                      ),
2159                                                      Annotated(
2160                                                          String(
2161                                                              "[Filtered]",
2162                                                          ),
2163                                                          Meta {
2164                                                              remarks: [
2165                                                                  Remark {
2166                                                                      ty: Substituted,
2167                                                                      rule_id: "@anything:replace",
2168                                                                      range: Some(
2169                                                                          (
2170                                                                              0,
2171                                                                              10,
2172                                                                          ),
2173                                                                      ),
2174                                                                  },
2175                                                              ],
2176                                                              errors: [],
2177                                                              original_length: Some(
2178                                                                  13,
2179                                                              ),
2180                                                              original_value: None,
2181                                                          },
2182                                                      ),
2183                                                  ],
2184                                              ),
2185                                          ],
2186                                      ),
2187                                  },
2188                              )
2189                              "###));
2190        }
2191    }
2192
2193    #[test]
2194    fn test_tuple_array_scrubbed_with_string_selector_and_password_matcher() {
2195        let config = serde_json::from_str::<PiiConfig>(
2196            r##"
2197                {
2198                    "applications": {
2199                        "$string": ["@password:remove"]
2200                    }
2201                }
2202                "##,
2203        )
2204        .unwrap();
2205
2206        let mut event = Event::from_value(
2207            serde_json::json!(
2208            {
2209              "message": "hi",
2210              "exception": {
2211                "values": [
2212                  {
2213                    "type": "BrokenException",
2214                    "value": "Something failed",
2215                    "stacktrace": {
2216                      "frames": [
2217                        {
2218                            "vars": {
2219                                "headers": [
2220                                    ["authorization", "abc123"]
2221                                ]
2222                            }
2223                        }
2224                      ]
2225                    }
2226                  }
2227                ]
2228              }
2229            })
2230            .into(),
2231        );
2232
2233        let mut processor = PiiProcessor::new(config.compiled());
2234        process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2235
2236        let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
2237
2238        assert_debug_snapshot!(vars, @r###"
2239        FrameVars(
2240            {
2241                "headers": Array(
2242                    [
2243                        Array(
2244                            [
2245                                String(
2246                                    "authorization",
2247                                ),
2248                                Meta {
2249                                    remarks: [
2250                                        Remark {
2251                                            ty: Removed,
2252                                            rule_id: "@password:remove",
2253                                            range: None,
2254                                        },
2255                                    ],
2256                                    errors: [],
2257                                    original_length: None,
2258                                    original_value: None,
2259                                },
2260                            ],
2261                        ),
2262                    ],
2263                ),
2264            },
2265        )
2266        "###);
2267    }
2268}