1use std::borrow::Cow;
2use std::collections::{BTreeMap, BTreeSet};
3use std::mem;
4use std::sync::OnceLock;
5
6use regex::Regex;
7use relay_event_schema::processor::{
8 self, Chunk, FieldAttrs, Pii, ProcessValue, ProcessingAction, ProcessingResult,
9 ProcessingState, Processor, ValueType, enum_set, process_value,
10};
11use relay_event_schema::protocol::{
12 AsPair, Event, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User,
13};
14use relay_protocol::{Annotated, Array, Meta, Remark, RemarkType, Value};
15
16use crate::compiledconfig::{CompiledPiiConfig, RuleRef};
17use crate::config::RuleType;
18use crate::redactions::Redaction;
19use crate::regexes::{self, ANYTHING_REGEX, PatternType, ReplaceBehavior};
20use crate::utils;
21
22#[derive(Debug, Clone, Copy)]
24pub enum AttributeMode {
25 Object,
28 ValueOnly,
31}
32
33pub struct PiiProcessor<'a> {
35 attribute_mode: AttributeMode,
37 compiled_config: &'a CompiledPiiConfig,
38}
39
40impl<'a> PiiProcessor<'a> {
41 pub fn new(compiled_config: &'a CompiledPiiConfig) -> PiiProcessor<'a> {
43 PiiProcessor {
46 compiled_config,
47 attribute_mode: AttributeMode::Object,
48 }
49 }
50
51 pub fn attribute_mode(mut self, attribute_mode: AttributeMode) -> Self {
53 self.attribute_mode = attribute_mode;
54 self
55 }
56
57 fn apply_all_rules(
58 &self,
59 meta: &mut Meta,
60 state: &ProcessingState<'_>,
61 mut value: Option<&mut String>,
62 ) -> ProcessingResult {
63 let pii = state.pii();
64 if pii == Pii::False {
65 return Ok(());
66 }
67
68 for (selector, rules) in self.compiled_config.applications.iter() {
69 if selector.matches_path(&state.path()) {
70 #[allow(clippy::needless_option_as_deref)]
71 for rule in rules {
72 let reborrowed_value = value.as_deref_mut();
73 apply_rule_to_value(meta, rule, state.path().key(), reborrowed_value)?;
74 }
75 }
76 }
77
78 Ok(())
79 }
80}
81
82impl Processor for PiiProcessor<'_> {
83 fn before_process<T: ProcessValue>(
84 &mut self,
85 value: Option<&T>,
86 meta: &mut Meta,
87 state: &ProcessingState<'_>,
88 ) -> ProcessingResult {
89 if let Some(Value::String(original_value)) = meta.original_value_as_mut() {
90 if let Some(parent) = state.iter().next() {
94 let path = state.path();
95 let new_state = parent.enter_borrowed(
96 path.key().unwrap_or(""),
97 Some(Cow::Borrowed(state.attrs())),
98 enum_set!(ValueType::String),
99 );
100
101 if self
102 .apply_all_rules(&mut Meta::default(), &new_state, Some(original_value))
103 .is_err()
104 {
105 meta.set_original_value(Option::<String>::None);
107 }
108 }
109 }
110
111 if state.value_type().contains(ValueType::Boolean)
113 || state.value_type().contains(ValueType::String)
114 {
115 return Ok(());
116 }
117
118 if value.is_none() {
119 return Ok(());
120 }
121
122 self.apply_all_rules(meta, state, None)
124 }
125
126 fn process_array<T>(
127 &mut self,
128 array: &mut Array<T>,
129 _meta: &mut Meta,
130 state: &ProcessingState<'_>,
131 ) -> ProcessingResult
132 where
133 T: ProcessValue,
134 {
135 if is_pairlist(array) {
136 for annotated in array {
137 let mut mapped = mem::take(annotated).map_value(T::into_value);
138
139 if let Some(Value::Array(pair)) = mapped.value_mut() {
140 let mut value = mem::take(&mut pair[1]);
141 let value_type = ValueType::for_field(&value);
142
143 if let Some(key_name) = &pair[0].as_str() {
144 let key_state =
147 state.enter_borrowed(key_name, state.inner_attrs(), value_type);
148 process_value(&mut value, self, &key_state)?;
151 }
152
153 pair[1] = value;
155 }
156
157 *annotated = T::from_value(mapped);
159 }
160
161 Ok(())
162 } else {
163 array.process_child_values(self, state)
165 }
166 }
167
168 fn process_string(
169 &mut self,
170 value: &mut String,
171 meta: &mut Meta,
172 state: &ProcessingState<'_>,
173 ) -> ProcessingResult {
174 if let "" | "true" | "false" | "null" | "undefined" = value.as_str() {
175 return Ok(());
176 }
177
178 self.apply_all_rules(meta, state, Some(value))
181 }
182
183 fn process_native_image_path(
184 &mut self,
185 NativeImagePath(value): &mut NativeImagePath,
186 meta: &mut Meta,
187 state: &ProcessingState<'_>,
188 ) -> ProcessingResult {
189 if let Some(index) = value.rfind(['/', '\\']) {
200 let basename = value.split_off(index);
201 match self.process_string(value, meta, state) {
202 Ok(()) => value.push_str(&basename),
203 Err(
204 ProcessingAction::DeleteValueHard
205 | ProcessingAction::DeleteValueWithRemark(_)
206 | ProcessingAction::DeleteValueSoft,
207 ) => {
208 basename[1..].clone_into(value);
209 }
210 Err(ProcessingAction::InvalidTransaction(x)) => {
211 return Err(ProcessingAction::InvalidTransaction(x));
212 }
213 }
214 }
215
216 Ok(())
217 }
218
219 fn process_pairlist<T: ProcessValue + AsPair>(
220 &mut self,
221 value: &mut PairList<T>,
222 _meta: &mut Meta,
223 state: &ProcessingState,
224 ) -> ProcessingResult {
225 utils::process_pairlist(self, value, state)
226 }
227
228 fn process_attributes(
229 &mut self,
230 value: &mut relay_event_schema::protocol::Attributes,
231 _meta: &mut Meta,
232 state: &ProcessingState,
233 ) -> ProcessingResult {
234 match self.attribute_mode {
235 AttributeMode::Object => value.process_child_values(self, state),
237 AttributeMode::ValueOnly => {
239 for (key, attribute) in value.0.iter_mut() {
240 let Some(attribute) = attribute.value_mut() else {
241 continue;
242 };
243
244 let attrs = FieldAttrs::new()
249 .pii_dynamic(relay_event_schema::protocol::attribute_pii_from_conventions);
250 let inner_value = &mut attribute.value.value;
251 let inner_value_type = ValueType::for_field(inner_value);
252 let entered =
253 state.enter_borrowed(key, Some(Cow::Borrowed(&attrs)), inner_value_type);
254
255 processor::process_value(inner_value, self, &entered)?;
256 self.process_other(&mut attribute.other, state)?;
257 }
258 Ok(())
259 }
260 }
261 }
262
263 fn process_user(
264 &mut self,
265 user: &mut User,
266 _meta: &mut Meta,
267 state: &ProcessingState<'_>,
268 ) -> ProcessingResult {
269 let ip_was_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
270
271 user.process_child_values(self, state)?;
273
274 let has_other_fields = user.id.value().is_some()
275 || user.username.value().is_some()
276 || user.email.value().is_some();
277
278 let ip_is_still_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
279
280 if ip_was_valid && !has_other_fields && !ip_is_still_valid {
290 user.id = mem::take(&mut user.ip_address).map_value(|ip| ip.into_inner().into());
291 user.ip_address.meta_mut().add_remark(Remark::new(
292 RemarkType::Removed,
293 "pii:ip_address".to_owned(),
294 ));
295 }
296
297 Ok(())
298 }
299
300 fn process_replay(
302 &mut self,
303 replay: &mut Replay,
304 _meta: &mut Meta,
305 state: &ProcessingState<'_>,
306 ) -> ProcessingResult {
307 replay.process_child_values(self, state)?;
308 Ok(())
309 }
310}
311
312#[derive(Default)]
313struct PairListProcessor {
314 is_pair: bool,
315 has_string_key: bool,
316}
317
318impl PairListProcessor {
319 fn is_pair_array(&self) -> bool {
322 self.is_pair && self.has_string_key
323 }
324}
325
326impl Processor for PairListProcessor {
327 fn process_array<T>(
328 &mut self,
329 value: &mut Array<T>,
330 _meta: &mut Meta,
331 state: &ProcessingState<'_>,
332 ) -> ProcessingResult
333 where
334 T: ProcessValue,
335 {
336 self.is_pair = state.depth() == 0 && value.len() == 2;
337 if self.is_pair {
338 let key_type = ValueType::for_field(&value[0]);
339 process_value(
340 &mut value[0],
341 self,
342 &state.enter_index(0, state.inner_attrs(), key_type),
343 )?;
344 }
345
346 Ok(())
347 }
348
349 fn process_string(
350 &mut self,
351 _value: &mut String,
352 _meta: &mut Meta,
353 state: &ProcessingState<'_>,
354 ) -> ProcessingResult where {
355 if state.depth() == 1 && state.path().index() == Some(0) {
356 self.has_string_key = true;
357 }
358
359 Ok(())
360 }
361}
362
363fn is_pairlist<T: ProcessValue>(array: &mut Array<T>) -> bool {
364 for element in array.iter_mut() {
365 let mut visitor = PairListProcessor::default();
366 process_value(element, &mut visitor, ProcessingState::root()).ok();
367 if !visitor.is_pair_array() {
368 return false;
369 }
370 }
371
372 !array.is_empty()
373}
374
375pub fn scrub_graphql(event: &mut Event) {
377 let mut keys: BTreeSet<&str> = BTreeSet::new();
378
379 let mut is_graphql = false;
380
381 if let Some(request) = event.request.value_mut()
383 && let Some(Value::Object(data)) = request.data.value_mut()
384 {
385 if let Some(api_target) = request.api_target.value()
386 && api_target.eq_ignore_ascii_case("graphql")
387 {
388 is_graphql = true;
389 }
390
391 if is_graphql
392 && let Some(Annotated(Some(Value::Object(variables)), _)) = data.get_mut("variables")
393 {
394 for (key, value) in variables.iter_mut() {
395 keys.insert(key);
396 value.set_value(Some(Value::String("[Filtered]".to_owned())));
397 }
398 }
399 }
400
401 if !is_graphql {
402 return;
403 }
404
405 if let Some(contexts) = event.contexts.value_mut()
407 && let Some(response) = contexts.get_mut::<ResponseContext>()
408 && let Some(Value::Object(data)) = response.data.value_mut()
409 && let Some(Annotated(Some(Value::Object(graphql_data)), _)) = data.get_mut("data")
410 {
411 if !keys.is_empty() {
412 scrub_graphql_data(&keys, graphql_data);
413 } else {
414 data.remove("data");
417 }
418 }
419}
420
421fn scrub_graphql_data(keys: &BTreeSet<&str>, data: &mut BTreeMap<String, Annotated<Value>>) {
423 for (key, value) in data.iter_mut() {
424 match value.value_mut() {
425 Some(Value::Object(item_data)) => {
426 scrub_graphql_data(keys, item_data);
427 }
428 _ => {
429 if keys.contains(key.as_str()) {
430 value.set_value(Some(Value::String("[Filtered]".to_owned())));
431 }
432 }
433 }
434 }
435}
436
437fn apply_rule_to_value(
438 meta: &mut Meta,
439 rule: &RuleRef,
440 key: Option<&str>,
441 mut value: Option<&mut String>,
442) -> ProcessingResult {
443 let should_redact_chunks = !matches!(rule.redaction, Redaction::Default | Redaction::Remove);
446
447 if rule.ty == RuleType::Anything && (value.is_none() || !should_redact_chunks) {
450 meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
452 return Err(ProcessingAction::DeleteValueHard);
453 }
454
455 macro_rules! apply_regex {
456 ($regex:expr, $replace_behavior:expr) => {
457 if let Some(ref mut value) = value {
458 processor::process_chunked_value(value, meta, |chunks| {
459 apply_regex_to_chunks(chunks, rule, $regex, $replace_behavior)
460 });
461 }
462 };
463 }
464
465 for (pattern_type, regex, replace_behavior) in regexes::get_regex_for_rule_type(&rule.ty) {
466 if matches!(pattern_type, PatternType::Key | PatternType::KeyValue)
467 && key.is_some_and(|key| regex.is_match(key))
468 {
469 if value.is_some() && should_redact_chunks {
470 apply_regex!(&ANYTHING_REGEX, replace_behavior);
473 } else {
474 meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
475 return Err(ProcessingAction::DeleteValueHard);
476 }
477 } else if matches!(pattern_type, PatternType::Value | PatternType::KeyValue) {
478 apply_regex!(regex, replace_behavior);
479 }
480 }
481
482 Ok(())
483}
484
485fn apply_regex_to_chunks<'a>(
486 chunks: Vec<Chunk<'a>>,
487 rule: &RuleRef,
488 regex: &Regex,
489 replace_behavior: ReplaceBehavior,
490) -> Vec<Chunk<'a>> {
491 let mut search_string = String::new();
496 let mut has_text = false;
497 for chunk in &chunks {
498 match chunk {
499 Chunk::Text { text } => {
500 has_text = true;
501 search_string.push_str(&text.replace('\x00', ""));
502 }
503 Chunk::Redaction { .. } => search_string.push('\x00'),
504 }
505 }
506
507 if !has_text {
508 return chunks;
510 }
511
512 let mut captures_iter = regex.captures_iter(&search_string).peekable();
514 if captures_iter.peek().is_none() {
515 return chunks;
516 }
517
518 let mut replacement_chunks = vec![];
519 for chunk in chunks {
520 if let Chunk::Redaction { .. } = chunk {
521 replacement_chunks.push(chunk);
522 }
523 }
524 replacement_chunks.reverse();
525
526 fn process_text<'a>(
527 text: &str,
528 rv: &mut Vec<Chunk<'a>>,
529 replacement_chunks: &mut Vec<Chunk<'a>>,
530 ) {
531 if text.is_empty() {
532 return;
533 }
534
535 static NULL_SPLIT_RE: OnceLock<Regex> = OnceLock::new();
539 let regex = NULL_SPLIT_RE.get_or_init(|| {
540 #[allow(clippy::trivial_regex)]
541 Regex::new("\x00").unwrap()
542 });
543
544 let mut pos = 0;
545 for piece in regex.find_iter(text) {
546 rv.push(Chunk::Text {
547 text: Cow::Owned(text[pos..piece.start()].to_string()),
548 });
549 rv.push(replacement_chunks.pop().unwrap());
550 pos = piece.end();
551 }
552
553 rv.push(Chunk::Text {
554 text: Cow::Owned(text[pos..].to_string()),
555 });
556 }
557
558 let mut pos = 0;
559 let mut rv = Vec::with_capacity(replacement_chunks.len());
560
561 match replace_behavior {
562 ReplaceBehavior::Groups(ref groups) => {
563 for m in captures_iter {
564 for (idx, g) in m.iter().enumerate() {
565 if let Some(g) = g
566 && groups.contains(&(idx as u8))
567 {
568 process_text(
569 &search_string[pos..g.start()],
570 &mut rv,
571 &mut replacement_chunks,
572 );
573 insert_replacement_chunks(rule, g.as_str(), &mut rv);
574 pos = g.end();
575 }
576 }
577 }
578 process_text(&search_string[pos..], &mut rv, &mut replacement_chunks);
579 debug_assert!(replacement_chunks.is_empty());
580 }
581 ReplaceBehavior::Value => {
582 insert_replacement_chunks(rule, &search_string, &mut rv);
586 }
587 }
588 rv
589}
590
591fn insert_replacement_chunks(rule: &RuleRef, text: &str, output: &mut Vec<Chunk<'_>>) {
592 match &rule.redaction {
593 Redaction::Default | Redaction::Remove => {
594 output.push(Chunk::Redaction {
595 text: Cow::Borrowed(""),
596 rule_id: Cow::Owned(rule.origin.to_string()),
597 ty: RemarkType::Removed,
598 });
599 }
600 Redaction::Mask => {
601 let buf = vec!['*'; text.chars().count()];
602
603 output.push(Chunk::Redaction {
604 ty: RemarkType::Masked,
605 rule_id: Cow::Owned(rule.origin.to_string()),
606 text: buf.into_iter().collect(),
607 })
608 }
609 Redaction::Hash => {
610 output.push(Chunk::Redaction {
611 ty: RemarkType::Pseudonymized,
612 rule_id: Cow::Owned(rule.origin.to_string()),
613 text: Cow::Owned(utils::hash_value(text.as_bytes())),
614 });
615 }
616 Redaction::Replace(replace) => {
617 output.push(Chunk::Redaction {
618 ty: RemarkType::Substituted,
619 rule_id: Cow::Owned(rule.origin.to_string()),
620 text: Cow::Owned(replace.text.clone()),
621 });
622 }
623 Redaction::Other => relay_log::debug!("Incoming redaction is not supported"),
624 }
625}
626
627#[cfg(test)]
628mod tests {
629 use insta::{allow_duplicates, assert_debug_snapshot};
630 use relay_event_schema::processor::process_value;
631 use relay_event_schema::protocol::{
632 Addr, Breadcrumb, DebugImage, DebugMeta, ExtraValue, Headers, LogEntry, Message,
633 NativeDebugImage, Request, Span, TagEntry, Tags, TraceContext,
634 };
635 use relay_protocol::{FromValue, Object, assert_annotated_snapshot, get_value};
636 use serde_json::json;
637
638 use super::*;
639 use crate::{DataScrubbingConfig, PiiConfig, ReplaceRedaction};
640
641 fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Option<PiiConfig> {
642 use crate::convert::to_pii_config as to_pii_config_impl;
643 let rv = to_pii_config_impl(datascrubbing_config);
644 if let Some(ref config) = rv {
645 let roundtrip: PiiConfig =
646 serde_json::from_value(serde_json::to_value(config).unwrap()).unwrap();
647 assert_eq!(&roundtrip, config);
648 }
649 rv
650 }
651
652 #[test]
653 fn test_scrub_original_value() {
654 let mut data = Event::from_value(
655 json!({
656 "user": {
657 "username": "hey man 73.133.27.120", "ip_address": "is this an ip address? 73.133.27.120", },
660 "hpkp":"invalid data my ip address is 74.133.27.120 and my credit card number is 4571234567890111 ",
661 })
662 .into(),
663 );
664
665 let scrubbing_config = DataScrubbingConfig {
666 scrub_data: true,
667 scrub_ip_addresses: true,
668 scrub_defaults: true,
669 ..Default::default()
670 };
671
672 let pii_config = to_pii_config(&scrubbing_config).unwrap();
673 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
674
675 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
676
677 assert_debug_snapshot!(&data);
678 }
679
680 #[test]
681 fn test_sentry_user() {
682 let mut data = Event::from_value(
683 json!({
684 "user": {
685 "ip_address": "73.133.27.120",
686 "sentry_user": "ip:73.133.27.120",
687 },
688 })
689 .into(),
690 );
691
692 let scrubbing_config = DataScrubbingConfig {
693 scrub_data: true,
694 scrub_ip_addresses: true,
695 scrub_defaults: true,
696 ..Default::default()
697 };
698
699 let pii_config = to_pii_config(&scrubbing_config).unwrap();
700 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
701
702 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
703
704 assert_debug_snapshot!(&data);
705 }
706
707 #[test]
708 fn test_basic_stripping() {
709 let config = serde_json::from_str::<PiiConfig>(
710 r#"
711 {
712 "rules": {
713 "remove_bad_headers": {
714 "type": "redact_pair",
715 "keyPattern": "(?i)cookie|secret[-_]?key"
716 }
717 },
718 "applications": {
719 "$string": ["@ip"],
720 "$object.**": ["remove_bad_headers"]
721 }
722 }
723 "#,
724 )
725 .unwrap();
726
727 let mut event = Annotated::new(Event {
728 logentry: Annotated::new(LogEntry {
729 formatted: Annotated::new("Hello world!".to_owned().into()),
730 ..Default::default()
731 }),
732 request: Annotated::new(Request {
733 env: {
734 let mut rv = Object::new();
735 rv.insert(
736 "SECRET_KEY".to_owned(),
737 Annotated::new(Value::String("134141231231231231231312".into())),
738 );
739 Annotated::new(rv)
740 },
741 headers: {
742 let rv = vec![
743 Annotated::new((
744 Annotated::new("Cookie".to_owned().into()),
745 Annotated::new("super secret".to_owned().into()),
746 )),
747 Annotated::new((
748 Annotated::new("X-Forwarded-For".to_owned().into()),
749 Annotated::new("127.0.0.1".to_owned().into()),
750 )),
751 ];
752 Annotated::new(Headers(PairList(rv)))
753 },
754 ..Default::default()
755 }),
756 tags: Annotated::new(Tags(
757 vec![Annotated::new(TagEntry(
758 Annotated::new("forwarded_for".to_owned()),
759 Annotated::new("127.0.0.1".to_owned()),
760 ))]
761 .into(),
762 )),
763 ..Default::default()
764 });
765
766 let mut processor = PiiProcessor::new(config.compiled());
767 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
768 assert_annotated_snapshot!(event);
769 }
770
771 #[test]
772 fn test_redact_containers() {
773 let config = serde_json::from_str::<PiiConfig>(
774 r#"
775 {
776 "applications": {
777 "$object": ["@anything"]
778 }
779 }
780 "#,
781 )
782 .unwrap();
783
784 let mut event = Annotated::new(Event {
785 extra: {
786 let mut map = Object::new();
787 map.insert(
788 "foo".to_owned(),
789 Annotated::new(ExtraValue(Value::String("bar".to_owned()))),
790 );
791 Annotated::new(map)
792 },
793 ..Default::default()
794 });
795
796 let mut processor = PiiProcessor::new(config.compiled());
797 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
798 assert_annotated_snapshot!(event);
799 }
800
801 #[test]
802 fn test_redact_custom_pattern() {
803 let config = serde_json::from_str::<PiiConfig>(
804 r#"
805 {
806 "applications": {
807 "$string": ["myrule"]
808 },
809 "rules": {
810 "myrule": {
811 "type": "pattern",
812 "pattern": "foo",
813 "redaction": {
814 "method": "replace",
815 "text": "asd"
816 }
817 }
818 }
819 }
820 "#,
821 )
822 .unwrap();
823
824 let mut event = Annotated::new(Event {
825 extra: {
826 let mut map = Object::new();
827 map.insert(
828 "myvalue".to_owned(),
829 Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
830 );
831 Annotated::new(map)
832 },
833 ..Default::default()
834 });
835
836 let mut processor = PiiProcessor::new(config.compiled());
837 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
838 assert_annotated_snapshot!(event);
839 }
840
841 #[test]
842 fn test_redact_custom_negative_pattern() {
843 let config = serde_json::from_str::<PiiConfig>(
844 r#"
845 {
846 "applications": {
847 "$string": ["myrule"]
848 },
849 "rules": {
850 "myrule": {
851 "type": "pattern",
852 "pattern": "the good string|.*OK.*|(.*)",
853 "replaceGroups": [1],
854 "redaction": {
855 "method": "mask"
856 }
857 }
858 }
859 }
860 "#,
861 )
862 .unwrap();
863
864 let mut event = Annotated::<Event>::from_json(
865 r#"{
866 "extra": {
867 "1": "the good string",
868 "2": "a bad string",
869 "3": "another OK string",
870 "4": "another bad one"
871 }
872 }"#,
873 )
874 .unwrap();
875
876 let mut processor = PiiProcessor::new(config.compiled());
877 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
878 assert_annotated_snapshot!(event.value().unwrap().extra, @r#"
879 {
880 "1": "the good string",
881 "2": "************",
882 "3": "another OK string",
883 "4": "***************",
884 "_meta": {
885 "2": {
886 "": {
887 "rem": [
888 [
889 "myrule",
890 "m",
891 0,
892 12
893 ]
894 ],
895 "len": 12
896 }
897 },
898 "4": {
899 "": {
900 "rem": [
901 [
902 "myrule",
903 "m",
904 0,
905 15
906 ]
907 ],
908 "len": 15
909 }
910 }
911 }
912 }
913 "#);
914 }
915
916 #[test]
917 fn test_no_field_upsert() {
918 let config = serde_json::from_str::<PiiConfig>(
919 r#"
920 {
921 "applications": {
922 "**": ["@anything:remove"]
923 }
924 }
925 "#,
926 )
927 .unwrap();
928
929 let mut event = Annotated::new(Event {
930 extra: {
931 let mut map = Object::new();
932 map.insert(
933 "myvalue".to_owned(),
934 Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
935 );
936 Annotated::new(map)
937 },
938 ..Default::default()
939 });
940
941 let mut processor = PiiProcessor::new(config.compiled());
942 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
943 assert_annotated_snapshot!(event);
944 }
945
946 #[test]
947 fn test_anything_hash_on_string() {
948 let config = serde_json::from_str::<PiiConfig>(
949 r#"
950 {
951 "applications": {
952 "$string": ["@anything:hash"]
953 }
954 }
955 "#,
956 )
957 .unwrap();
958
959 let mut event = Annotated::new(Event {
960 extra: {
961 let mut map = Object::new();
962 map.insert(
963 "myvalue".to_owned(),
964 Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
965 );
966 Annotated::new(map)
967 },
968 ..Default::default()
969 });
970
971 let mut processor = PiiProcessor::new(config.compiled());
972 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
973 assert_annotated_snapshot!(event);
974 }
975
976 #[test]
977 fn test_anything_hash_on_container() {
978 let config = serde_json::from_str::<PiiConfig>(
979 r#"
980 {
981 "applications": {
982 "$object": ["@anything:hash"]
983 }
984 }
985 "#,
986 )
987 .unwrap();
988
989 let mut event = Annotated::new(Event {
990 extra: {
991 let mut map = Object::new();
992 map.insert(
993 "myvalue".to_owned(),
994 Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
995 );
996 Annotated::new(map)
997 },
998 ..Default::default()
999 });
1000
1001 let mut processor = PiiProcessor::new(config.compiled());
1002 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1003 assert_annotated_snapshot!(event);
1004 }
1005
1006 #[test]
1007 fn test_only_match_token_on_keys() {
1008 let mut data = Event::from_value(
1009 json!({
1010 "request": {
1011 "headers": [
1012 ["X-Token", "oof this is very sensitive"],
1013 ["Token", "also bad"],
1014 ]
1015 },
1016 "extra": {
1017 "url": "foo.bar/endpoint?token=sensitive",
1018 "url2": "foo.bar/endpoint?token_foobar=sensitive",
1019 "aaa": "token:12345",
1020 "foo-token-bar": "sensitive",
1021 "llm": "token count",
1022 },
1023 })
1024 .into(),
1025 );
1026
1027 let scrubbing_config = DataScrubbingConfig {
1028 scrub_data: true,
1029 scrub_ip_addresses: true,
1030 scrub_defaults: true,
1031 ..Default::default()
1032 };
1033
1034 let pii_config = to_pii_config(&scrubbing_config).unwrap();
1035 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1036
1037 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1038
1039 assert_annotated_snapshot!(&data);
1040 }
1041
1042 #[test]
1043 fn test_ignore_user_agent_ip_scrubbing() {
1044 let mut data = Event::from_value(
1045 json!({
1046 "request": {
1047 "headers": [
1048 ["User-Agent", "127.0.0.1"],
1049 ["X-Client-Ip", "10.0.0.1"]
1050 ]
1051 },
1052 })
1053 .into(),
1054 );
1055
1056 let scrubbing_config = DataScrubbingConfig {
1057 scrub_data: true,
1058 scrub_ip_addresses: true,
1059 scrub_defaults: true,
1060 ..Default::default()
1061 };
1062
1063 let pii_config = to_pii_config(&scrubbing_config).unwrap();
1064 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1065
1066 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1067
1068 assert_annotated_snapshot!(&data);
1069 }
1070
1071 #[test]
1072 fn test_remove_debugmeta_path() {
1073 let config = serde_json::from_str::<PiiConfig>(
1074 r#"
1075 {
1076 "applications": {
1077 "debug_meta.images.*.code_file": ["@anything:remove"],
1078 "debug_meta.images.*.debug_file": ["@anything:remove"]
1079 }
1080 }
1081 "#,
1082 )
1083 .unwrap();
1084
1085 let mut event = Annotated::new(Event {
1086 debug_meta: Annotated::new(DebugMeta {
1087 images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1088 NativeDebugImage {
1089 code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1090 code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1091 debug_id: Annotated::new(
1092 "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1093 ),
1094 debug_file: Annotated::new("wntdll.pdb".into()),
1095 debug_checksum: Annotated::empty(),
1096 arch: Annotated::new("arm64".to_owned()),
1097 image_addr: Annotated::new(Addr(0)),
1098 image_size: Annotated::new(4096),
1099 image_vmaddr: Annotated::new(Addr(32768)),
1100 other: {
1101 let mut map = Object::new();
1102 map.insert(
1103 "other".to_owned(),
1104 Annotated::new(Value::String("value".to_owned())),
1105 );
1106 map
1107 },
1108 },
1109 )))]),
1110 ..Default::default()
1111 }),
1112 ..Default::default()
1113 });
1114
1115 let mut processor = PiiProcessor::new(config.compiled());
1116 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1117 assert_annotated_snapshot!(event);
1118 }
1119
1120 #[test]
1121 fn test_replace_debugmeta_path() {
1122 let config = serde_json::from_str::<PiiConfig>(
1123 r#"
1124 {
1125 "applications": {
1126 "debug_meta.images.*.code_file": ["@anything:replace"],
1127 "debug_meta.images.*.debug_file": ["@anything:replace"]
1128 }
1129 }
1130 "#,
1131 )
1132 .unwrap();
1133
1134 let mut event = Annotated::new(Event {
1135 debug_meta: Annotated::new(DebugMeta {
1136 images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1137 NativeDebugImage {
1138 code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1139 code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1140 debug_id: Annotated::new(
1141 "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1142 ),
1143 debug_file: Annotated::new("wntdll.pdb".into()),
1144 debug_checksum: Annotated::empty(),
1145 arch: Annotated::new("arm64".to_owned()),
1146 image_addr: Annotated::new(Addr(0)),
1147 image_size: Annotated::new(4096),
1148 image_vmaddr: Annotated::new(Addr(32768)),
1149 other: {
1150 let mut map = Object::new();
1151 map.insert(
1152 "other".to_owned(),
1153 Annotated::new(Value::String("value".to_owned())),
1154 );
1155 map
1156 },
1157 },
1158 )))]),
1159 ..Default::default()
1160 }),
1161 ..Default::default()
1162 });
1163
1164 let mut processor = PiiProcessor::new(config.compiled());
1165 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1166 assert_annotated_snapshot!(event);
1167 }
1168
1169 #[test]
1170 fn test_hash_debugmeta_path() {
1171 let config = serde_json::from_str::<PiiConfig>(
1172 r#"
1173 {
1174 "applications": {
1175 "debug_meta.images.*.code_file": ["@anything:hash"],
1176 "debug_meta.images.*.debug_file": ["@anything:hash"]
1177 }
1178 }
1179 "#,
1180 )
1181 .unwrap();
1182
1183 let mut event = Annotated::new(Event {
1184 debug_meta: Annotated::new(DebugMeta {
1185 images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1186 NativeDebugImage {
1187 code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1188 code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1189 debug_id: Annotated::new(
1190 "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1191 ),
1192 debug_file: Annotated::new("wntdll.pdb".into()),
1193 debug_checksum: Annotated::empty(),
1194 arch: Annotated::new("arm64".to_owned()),
1195 image_addr: Annotated::new(Addr(0)),
1196 image_size: Annotated::new(4096),
1197 image_vmaddr: Annotated::new(Addr(32768)),
1198 other: {
1199 let mut map = Object::new();
1200 map.insert(
1201 "other".to_owned(),
1202 Annotated::new(Value::String("value".to_owned())),
1203 );
1204 map
1205 },
1206 },
1207 )))]),
1208 ..Default::default()
1209 }),
1210 ..Default::default()
1211 });
1212
1213 let mut processor = PiiProcessor::new(config.compiled());
1214 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1215 assert_annotated_snapshot!(event);
1216 }
1217
1218 #[test]
1219 fn test_debugmeta_path_not_addressible_with_wildcard_selector() {
1220 let config = serde_json::from_str::<PiiConfig>(
1221 r#"
1222 {
1223 "applications": {
1224 "$string": ["@anything:remove"],
1225 "**": ["@anything:remove"],
1226 "debug_meta.**": ["@anything:remove"],
1227 "(debug_meta.images.**.code_file & $string)": ["@anything:remove"]
1228 }
1229 }
1230 "#,
1231 )
1232 .unwrap();
1233
1234 let mut event = Annotated::new(Event {
1235 debug_meta: Annotated::new(DebugMeta {
1236 images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1237 NativeDebugImage {
1238 code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1239 code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1240 debug_id: Annotated::new(
1241 "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1242 ),
1243 debug_file: Annotated::new("wntdll.pdb".into()),
1244 debug_checksum: Annotated::empty(),
1245 arch: Annotated::new("arm64".to_owned()),
1246 image_addr: Annotated::new(Addr(0)),
1247 image_size: Annotated::new(4096),
1248 image_vmaddr: Annotated::new(Addr(32768)),
1249 other: {
1250 let mut map = Object::new();
1251 map.insert(
1252 "other".to_owned(),
1253 Annotated::new(Value::String("value".to_owned())),
1254 );
1255 map
1256 },
1257 },
1258 )))]),
1259 ..Default::default()
1260 }),
1261 ..Default::default()
1262 });
1263
1264 let mut processor = PiiProcessor::new(config.compiled());
1265 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1266 assert_annotated_snapshot!(event);
1267 }
1268
1269 #[test]
1270 fn test_quoted_keys() {
1271 let config = serde_json::from_str::<PiiConfig>(
1272 r#"
1273 {
1274 "applications": {
1275 "extra.'special ,./<>?!@#$%^&*())''gärbage'''": ["@anything:remove"]
1276 }
1277 }
1278 "#,
1279 )
1280 .unwrap();
1281
1282 let mut event = Annotated::new(Event {
1283 extra: {
1284 let mut map = Object::new();
1285 map.insert(
1286 "do not ,./<>?!@#$%^&*())'ßtrip'".to_owned(),
1287 Annotated::new(ExtraValue(Value::String("foo".to_owned()))),
1288 );
1289 map.insert(
1290 "special ,./<>?!@#$%^&*())'gärbage'".to_owned(),
1291 Annotated::new(ExtraValue(Value::String("bar".to_owned()))),
1292 );
1293 Annotated::new(map)
1294 },
1295 ..Default::default()
1296 });
1297
1298 let mut processor = PiiProcessor::new(config.compiled());
1299 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1300 assert_annotated_snapshot!(event);
1301 }
1302
1303 #[test]
1304 fn test_logentry_value_types() {
1305 for formatted_selector in &[
1307 "$logentry.formatted",
1308 "$message",
1309 "$logentry.formatted && $message",
1310 "$string",
1311 ] {
1312 let config = serde_json::from_str::<PiiConfig>(&format!(
1313 r##"
1314 {{
1315 "applications": {{
1316 "{formatted_selector}": ["@anything:remove"]
1317 }}
1318 }}
1319 "##
1320 ))
1321 .unwrap();
1322
1323 let mut event = Annotated::new(Event {
1324 logentry: Annotated::new(LogEntry {
1325 formatted: Annotated::new("Hello world!".to_owned().into()),
1326 ..Default::default()
1327 }),
1328 ..Default::default()
1329 });
1330
1331 let mut processor = PiiProcessor::new(config.compiled());
1332 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1333 assert!(
1334 event
1335 .value()
1336 .unwrap()
1337 .logentry
1338 .value()
1339 .unwrap()
1340 .formatted
1341 .value()
1342 .is_none()
1343 );
1344 }
1345 }
1346
1347 #[test]
1348 fn test_logentry_formatted_never_fully_filtered() {
1349 let config = crate::convert::to_pii_config(&crate::DataScrubbingConfig {
1352 scrub_data: true,
1353 scrub_defaults: true,
1354 scrub_ip_addresses: true,
1355 ..Default::default()
1356 })
1357 .unwrap();
1358
1359 let mut event = Annotated::new(Event {
1360 logentry: Annotated::new(LogEntry {
1361 formatted: Annotated::new(
1362 "User john.doe@company.com failed login with card 4111-1111-1111-1111"
1363 .to_owned()
1364 .into(),
1365 ),
1366 ..Default::default()
1367 }),
1368 ..Default::default()
1369 });
1370
1371 let mut processor = PiiProcessor::new(config.compiled());
1372 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1373 assert_annotated_snapshot!(event, @r#"
1374 {
1375 "logentry": {
1376 "formatted": "User [email] failed login with card [creditcard]"
1377 },
1378 "_meta": {
1379 "logentry": {
1380 "formatted": {
1381 "": {
1382 "rem": [
1383 [
1384 "@email:replace",
1385 "s",
1386 5,
1387 12
1388 ],
1389 [
1390 "@creditcard:replace",
1391 "s",
1392 36,
1393 48
1394 ]
1395 ],
1396 "len": 68
1397 }
1398 }
1399 }
1400 }
1401 }
1402 "#);
1403 }
1404
1405 #[test]
1406 fn test_logentry_formatted_bearer_token_scrubbing() {
1407 let config = crate::convert::to_pii_config(&crate::DataScrubbingConfig {
1409 scrub_data: true,
1410 scrub_defaults: true,
1411 ..Default::default()
1412 })
1413 .unwrap();
1414
1415 let mut event = Annotated::new(Event {
1416 logentry: Annotated::new(LogEntry {
1417 formatted: Annotated::new(
1418 "API request failed with Bearer ABC123XYZ789TOKEN and other data"
1419 .to_owned()
1420 .into(),
1421 ),
1422 ..Default::default()
1423 }),
1424 ..Default::default()
1425 });
1426
1427 let mut processor = PiiProcessor::new(config.compiled());
1428 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1429 assert_annotated_snapshot!(event, @r#"
1430 {
1431 "logentry": {
1432 "formatted": "API request failed with Bearer [token] and other data"
1433 },
1434 "_meta": {
1435 "logentry": {
1436 "formatted": {
1437 "": {
1438 "rem": [
1439 [
1440 "@bearer:replace",
1441 "s",
1442 24,
1443 38
1444 ]
1445 ],
1446 "len": 63
1447 }
1448 }
1449 }
1450 }
1451 }
1452 "#);
1453 }
1454
1455 #[test]
1456 fn test_logentry_formatted_password_word_not_scrubbed() {
1457 let config = PiiConfig::default();
1458 let mut event = Annotated::new(Event {
1459 logentry: Annotated::new(LogEntry {
1460 formatted: Annotated::new(
1461 "User password is secret123 for authentication"
1462 .to_owned()
1463 .into(),
1464 ),
1465 ..Default::default()
1466 }),
1467 ..Default::default()
1468 });
1469
1470 let mut processor = PiiProcessor::new(config.compiled());
1471 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1472 assert_annotated_snapshot!(event, @r#"
1473 {
1474 "logentry": {
1475 "formatted": "User password is secret123 for authentication"
1476 }
1477 }
1478 "#);
1479 }
1480
1481 #[test]
1482 fn test_ip_address_hashing() {
1483 let config = serde_json::from_str::<PiiConfig>(
1484 r#"
1485 {
1486 "applications": {
1487 "$user.ip_address": ["@ip:hash"]
1488 }
1489 }
1490 "#,
1491 )
1492 .unwrap();
1493
1494 let mut event = Annotated::new(Event {
1495 user: Annotated::new(User {
1496 ip_address: Annotated::new(IpAddr("127.0.0.1".to_owned())),
1497 ..Default::default()
1498 }),
1499 ..Default::default()
1500 });
1501
1502 let mut processor = PiiProcessor::new(config.compiled());
1503 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1504
1505 let user = event.value().unwrap().user.value().unwrap();
1506
1507 assert!(user.ip_address.value().is_none());
1508
1509 assert_eq!(
1510 user.id.value().unwrap().as_str(),
1511 "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1512 );
1513 }
1514
1515 #[test]
1516 fn test_ip_address_hashing_does_not_overwrite_id() {
1517 let config = serde_json::from_str::<PiiConfig>(
1518 r#"
1519 {
1520 "applications": {
1521 "$user.ip_address": ["@ip:hash"]
1522 }
1523 }
1524 "#,
1525 )
1526 .unwrap();
1527
1528 let mut event = Annotated::new(Event {
1529 user: Annotated::new(User {
1530 id: Annotated::new("123".to_owned().into()),
1531 ip_address: Annotated::new(IpAddr("127.0.0.1".to_owned())),
1532 ..Default::default()
1533 }),
1534 ..Default::default()
1535 });
1536
1537 let mut processor = PiiProcessor::new(config.compiled());
1538 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1539
1540 let user = event.value().unwrap().user.value().unwrap();
1541
1542 assert_eq!(
1544 user.ip_address.value().unwrap().as_str(),
1545 "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1546 );
1547
1548 assert_eq!(user.id.value().unwrap().as_str(), "123");
1549 }
1550
1551 #[test]
1552 fn test_replace_replaced_text() {
1553 let chunks = vec![Chunk::Redaction {
1554 text: "[ip]".into(),
1555 rule_id: "@ip".into(),
1556 ty: RemarkType::Substituted,
1557 }];
1558 let rule = RuleRef {
1559 id: "@ip:replace".into(),
1560 origin: "@ip".into(),
1561 ty: RuleType::Ip,
1562 redaction: Redaction::Replace(ReplaceRedaction {
1563 text: "[ip]".into(),
1564 }),
1565 };
1566 let res = apply_regex_to_chunks(
1567 chunks.clone(),
1568 &rule,
1569 &Regex::new(r#".*"#).unwrap(),
1570 ReplaceBehavior::Value,
1571 );
1572 assert_eq!(chunks, res);
1573 }
1574
1575 #[test]
1576 fn test_replace_replaced_text_anything() {
1577 let chunks = vec![Chunk::Redaction {
1578 text: "[Filtered]".into(),
1579 rule_id: "@password:filter".into(),
1580 ty: RemarkType::Substituted,
1581 }];
1582 let rule = RuleRef {
1583 id: "@anything:filter".into(),
1584 origin: "@anything:filter".into(),
1585 ty: RuleType::Anything,
1586 redaction: Redaction::Replace(ReplaceRedaction {
1587 text: "[Filtered]".into(),
1588 }),
1589 };
1590 let res = apply_regex_to_chunks(
1591 chunks.clone(),
1592 &rule,
1593 &Regex::new(r#".*"#).unwrap(),
1594 ReplaceBehavior::Groups(smallvec::smallvec![0]),
1595 );
1596 assert_eq!(chunks, res);
1597 }
1598
1599 #[test]
1600 fn test_trace_route_params_scrubbed() {
1601 let mut trace_context: Annotated<TraceContext> = Annotated::from_json(
1602 r#"
1603 {
1604 "type": "trace",
1605 "trace_id": "4c79f60c11214eb38604f4ae0781bfb2",
1606 "span_id": "fa90fdead5f74052",
1607 "data": {
1608 "previousRoute": {
1609 "params": {
1610 "password": "test"
1611 }
1612 }
1613 }
1614 }
1615 "#,
1616 )
1617 .unwrap();
1618
1619 let ds_config = DataScrubbingConfig {
1620 scrub_data: true,
1621 scrub_defaults: true,
1622 ..Default::default()
1623 };
1624 let pii_config = ds_config.pii_config().as_ref().unwrap();
1625 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1626
1627 process_value(
1628 &mut trace_context,
1629 &mut pii_processor,
1630 ProcessingState::root(),
1631 )
1632 .unwrap();
1633 assert_annotated_snapshot!(trace_context);
1634 }
1635
1636 #[test]
1637 fn test_scrub_span_data_http_not_scrubbed() {
1638 let mut span: Annotated<Span> = Annotated::from_json(
1639 r#"{
1640 "data": {
1641 "http": {
1642 "query": "dance=true"
1643 }
1644 }
1645 }"#,
1646 )
1647 .unwrap();
1648
1649 let ds_config = DataScrubbingConfig {
1650 scrub_data: true,
1651 scrub_defaults: true,
1652 ..Default::default()
1653 };
1654 let pii_config = ds_config.pii_config().as_ref().unwrap();
1655 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1656
1657 process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1658 assert_annotated_snapshot!(span);
1659 }
1660
1661 #[test]
1662 fn test_scrub_span_data_http_strings_are_scrubbed() {
1663 let mut span: Annotated<Span> = Annotated::from_json(
1664 r#"{
1665 "data": {
1666 "http": {
1667 "query": "ccnumber=5105105105105100&process_id=123",
1668 "fragment": "ccnumber=5105105105105100,process_id=123"
1669 }
1670 }
1671 }"#,
1672 )
1673 .unwrap();
1674
1675 let ds_config = DataScrubbingConfig {
1676 scrub_data: true,
1677 scrub_defaults: true,
1678 ..Default::default()
1679 };
1680 let pii_config = ds_config.pii_config().as_ref().unwrap();
1681 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1682
1683 process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1684 assert_annotated_snapshot!(span);
1685 }
1686
1687 #[test]
1688 fn test_scrub_span_data_http_objects_are_scrubbed() {
1689 let mut span: Annotated<Span> = Annotated::from_json(
1690 r#"{
1691 "data": {
1692 "http": {
1693 "query": {
1694 "ccnumber": "5105105105105100",
1695 "process_id": "123"
1696 },
1697 "fragment": {
1698 "ccnumber": "5105105105105100",
1699 "process_id": "123"
1700 }
1701 }
1702 }
1703 }"#,
1704 )
1705 .unwrap();
1706
1707 let ds_config = DataScrubbingConfig {
1708 scrub_data: true,
1709 scrub_defaults: true,
1710 ..Default::default()
1711 };
1712 let pii_config = ds_config.pii_config().as_ref().unwrap();
1713 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1714
1715 process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1716 assert_annotated_snapshot!(span);
1717 }
1718
1719 #[test]
1720 fn test_scrub_span_data_untyped_props_are_scrubbed() {
1721 let mut span: Annotated<Span> = Annotated::from_json(
1722 r#"{
1723 "data": {
1724 "untyped": "ccnumber=5105105105105100",
1725 "more_untyped": {
1726 "typed": "no",
1727 "scrubbed": "yes",
1728 "ccnumber": "5105105105105100"
1729 }
1730 }
1731 }"#,
1732 )
1733 .unwrap();
1734
1735 let ds_config = DataScrubbingConfig {
1736 scrub_data: true,
1737 scrub_defaults: true,
1738 ..Default::default()
1739 };
1740 let pii_config = ds_config.pii_config().as_ref().unwrap();
1741 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1742
1743 process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1744 assert_annotated_snapshot!(span);
1745 }
1746
1747 #[test]
1748 fn test_span_data_pii() {
1749 let mut span = Span::from_value(
1750 json!({
1751 "data": {
1752 "code.filepath": "src/sentry/api/authentication.py",
1753 }
1754 })
1755 .into(),
1756 );
1757
1758 let ds_config = DataScrubbingConfig {
1759 scrub_data: true,
1760 scrub_defaults: true,
1761 ..Default::default()
1762 };
1763 let pii_config = ds_config.pii_config().as_ref().unwrap();
1764
1765 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1766 processor::process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1767 assert_eq!(
1768 get_value!(span.data.code_filepath!).as_str(),
1769 Some("src/sentry/api/authentication.py")
1770 );
1771 }
1772
1773 #[test]
1774 fn test_csp_source_file_pii() {
1775 let mut event = Event::from_value(
1776 json!({
1777 "csp": {
1778 "source_file": "authentication.js",
1779 }
1780 })
1781 .into(),
1782 );
1783
1784 let config = serde_json::from_str::<PiiConfig>(
1785 r#"
1786 {
1787 "applications": {
1788 "csp.source_file": ["@anything:filter"]
1789 }
1790 }
1791 "#,
1792 )
1793 .unwrap();
1794
1795 let mut pii_processor = PiiProcessor::new(config.compiled());
1796 processor::process_value(&mut event, &mut pii_processor, ProcessingState::root()).unwrap();
1797 assert_eq!(get_value!(event.csp.source_file!).as_str(), "[Filtered]");
1798 }
1799
1800 #[test]
1801 fn test_scrub_breadcrumb_data_http_not_scrubbed() {
1802 let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1803 r#"{
1804 "data": {
1805 "http": {
1806 "query": "dance=true"
1807 }
1808 }
1809 }"#,
1810 )
1811 .unwrap();
1812
1813 let ds_config = DataScrubbingConfig {
1814 scrub_data: true,
1815 scrub_defaults: true,
1816 ..Default::default()
1817 };
1818 let pii_config = ds_config.pii_config().as_ref().unwrap();
1819 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1820 process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1821 assert_annotated_snapshot!(breadcrumb);
1822 }
1823
1824 #[test]
1825 fn test_scrub_breadcrumb_data_http_strings_are_scrubbed() {
1826 let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1827 r#"{
1828 "data": {
1829 "http": {
1830 "query": "ccnumber=5105105105105100&process_id=123",
1831 "fragment": "ccnumber=5105105105105100,process_id=123"
1832 }
1833 }
1834 }"#,
1835 )
1836 .unwrap();
1837
1838 let ds_config = DataScrubbingConfig {
1839 scrub_data: true,
1840 scrub_defaults: true,
1841 ..Default::default()
1842 };
1843 let pii_config = ds_config.pii_config().as_ref().unwrap();
1844 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1845 process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1846 assert_annotated_snapshot!(breadcrumb);
1847 }
1848
1849 #[test]
1850 fn test_scrub_breadcrumb_data_http_objects_are_scrubbed() {
1851 let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1852 r#"{
1853 "data": {
1854 "http": {
1855 "query": {
1856 "ccnumber": "5105105105105100",
1857 "process_id": "123"
1858 },
1859 "fragment": {
1860 "ccnumber": "5105105105105100",
1861 "process_id": "123"
1862 }
1863 }
1864 }
1865 }"#,
1866 )
1867 .unwrap();
1868
1869 let ds_config = DataScrubbingConfig {
1870 scrub_data: true,
1871 scrub_defaults: true,
1872 ..Default::default()
1873 };
1874 let pii_config = ds_config.pii_config().as_ref().unwrap();
1875 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1876
1877 process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1878 assert_annotated_snapshot!(breadcrumb);
1879 }
1880
1881 #[test]
1882 fn test_scrub_breadcrumb_data_untyped_props_are_scrubbed() {
1883 let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1884 r#"{
1885 "data": {
1886 "untyped": "ccnumber=5105105105105100",
1887 "more_untyped": {
1888 "typed": "no",
1889 "scrubbed": "yes",
1890 "ccnumber": "5105105105105100"
1891 }
1892 }
1893 }"#,
1894 )
1895 .unwrap();
1896
1897 let ds_config = DataScrubbingConfig {
1898 scrub_data: true,
1899 scrub_defaults: true,
1900 ..Default::default()
1901 };
1902 let pii_config = ds_config.pii_config().as_ref().unwrap();
1903 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1904 process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1905 assert_annotated_snapshot!(breadcrumb);
1906 }
1907
1908 #[test]
1909 fn test_scrub_graphql_response_data_with_variables() {
1910 let mut data = Event::from_value(
1911 json!({
1912 "request": {
1913 "data": {
1914 "query": "{\n viewer {\n login\n }\n}",
1915 "variables": {
1916 "login": "foo"
1917 }
1918 },
1919 "api_target": "graphql"
1920 },
1921 "contexts": {
1922 "response": {
1923 "type": "response",
1924 "data": {
1925 "data": {
1926 "viewer": {
1927 "login": "foo"
1928 }
1929 }
1930 }
1931 }
1932 }
1933 })
1934 .into(),
1935 );
1936
1937 scrub_graphql(data.value_mut().as_mut().unwrap());
1938
1939 assert_debug_snapshot!(&data);
1940 }
1941
1942 #[test]
1943 fn test_scrub_graphql_response_data_without_variables() {
1944 let mut data = Event::from_value(
1945 json!({
1946 "request": {
1947 "data": {
1948 "query": "{\n viewer {\n login\n }\n}"
1949 },
1950 "api_target": "graphql"
1951 },
1952 "contexts": {
1953 "response": {
1954 "type": "response",
1955 "data": {
1956 "data": {
1957 "viewer": {
1958 "login": "foo"
1959 }
1960 }
1961 }
1962 }
1963 }
1964 })
1965 .into(),
1966 );
1967
1968 scrub_graphql(data.value_mut().as_mut().unwrap());
1969 assert_debug_snapshot!(&data);
1970 }
1971
1972 #[test]
1973 fn test_does_not_scrub_if_no_graphql() {
1974 let mut data = Event::from_value(
1975 json!({
1976 "request": {
1977 "data": {
1978 "query": "{\n viewer {\n login\n }\n}",
1979 "variables": {
1980 "login": "foo"
1981 }
1982 },
1983 },
1984 "contexts": {
1985 "response": {
1986 "type": "response",
1987 "data": {
1988 "data": {
1989 "viewer": {
1990 "login": "foo"
1991 }
1992 }
1993 }
1994 }
1995 }
1996 })
1997 .into(),
1998 );
1999
2000 let scrubbing_config = DataScrubbingConfig {
2001 scrub_data: true,
2002 scrub_ip_addresses: true,
2003 scrub_defaults: true,
2004 ..Default::default()
2005 };
2006
2007 let pii_config = to_pii_config(&scrubbing_config).unwrap();
2008 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
2009
2010 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
2011
2012 assert_debug_snapshot!(&data);
2013 }
2014
2015 #[test]
2016 fn test_logentry_params_scrubbed() {
2017 let config = serde_json::from_str::<PiiConfig>(
2018 r##"
2019 {
2020 "applications": {
2021 "$string": ["@anything:remove"]
2022 }
2023 }
2024 "##,
2025 )
2026 .unwrap();
2027
2028 let mut event = Annotated::new(Event {
2029 logentry: Annotated::new(LogEntry {
2030 message: Annotated::new(Message::from("failed to parse report id=%s".to_owned())),
2031 formatted: Annotated::new("failed to parse report id=1".to_owned().into()),
2032 params: Annotated::new(Value::Array(vec![Annotated::new(Value::String(
2033 "12345".to_owned(),
2034 ))])),
2035 ..Default::default()
2036 }),
2037 ..Default::default()
2038 });
2039
2040 let mut processor = PiiProcessor::new(config.compiled());
2041 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2042
2043 let params = get_value!(event.logentry.params!);
2044 assert_debug_snapshot!(params, @r###"
2045 Array(
2046 [
2047 Meta {
2048 remarks: [
2049 Remark {
2050 ty: Removed,
2051 rule_id: "@anything:remove",
2052 range: None,
2053 },
2054 ],
2055 errors: [],
2056 original_length: None,
2057 original_value: None,
2058 },
2059 ],
2060 )
2061 "###);
2062 }
2063
2064 #[test]
2065 fn test_is_pairlist() {
2066 for (case, expected) in [
2067 (r#"[]"#, false),
2068 (r#"["foo"]"#, false),
2069 (r#"["foo", 123]"#, false),
2070 (r#"[[1, "foo"]]"#, false),
2071 (r#"[[["too_nested", 123]]]"#, false),
2072 (r#"[["foo", "bar"], [1, "foo"]]"#, false),
2073 (r#"[["foo", "bar"], ["foo", "bar", "baz"]]"#, false),
2074 (r#"[["foo", "bar", "baz"], ["foo", "bar"]]"#, false),
2075 (r#"["foo", ["bar", "baz"], ["foo", "bar"]]"#, false),
2076 (r#"[["foo", "bar"], [["too_nested", 123]]]"#, false),
2077 (r#"[["foo", 123]]"#, true),
2078 (r#"[["foo", "bar"]]"#, true),
2079 (
2080 r#"[["foo", "bar"], ["foo", {"nested": {"something": 1}}]]"#,
2081 true,
2082 ),
2083 ] {
2084 let v = Annotated::<Value>::from_json(case).unwrap();
2085 let Annotated(Some(Value::Array(mut a)), _) = v else {
2086 panic!()
2087 };
2088 assert_eq!(is_pairlist(&mut a), expected, "{case}");
2089 }
2090 }
2091
2092 #[test]
2093 fn test_tuple_array_scrubbed_with_path_selector() {
2094 let configs = vec![
2096 r##"
2099 {
2100 "applications": {
2101 "exception.values.0.stacktrace.frames.0.vars.headers.authorization": ["@anything:replace"]
2102 }
2103 }
2104 "##,
2105 r##"
2107 {
2108 "applications": {
2109 "exception.values.0.stacktrace.frames.0.vars.headers.0.1": ["@anything:replace"]
2110 }
2111 }
2112 "##,
2113 ];
2114
2115 let mut event = Event::from_value(
2116 serde_json::json!(
2117 {
2118 "message": "hi",
2119 "exception": {
2120 "values": [
2121 {
2122 "type": "BrokenException",
2123 "value": "Something failed",
2124 "stacktrace": {
2125 "frames": [
2126 {
2127 "vars": {
2128 "headers": [
2129 ["authorization", "Bearer abc123"]
2130 ]
2131 }
2132 }
2133 ]
2134 }
2135 }
2136 ]
2137 }
2138 })
2139 .into(),
2140 );
2141
2142 for config in configs {
2143 let config = serde_json::from_str::<PiiConfig>(config).unwrap();
2144 let mut processor = PiiProcessor::new(config.compiled());
2145 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2146
2147 let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
2148
2149 allow_duplicates!(assert_debug_snapshot!(vars, @r###"
2150 FrameVars(
2151 {
2152 "headers": Array(
2153 [
2154 Array(
2155 [
2156 String(
2157 "authorization",
2158 ),
2159 Annotated(
2160 String(
2161 "[Filtered]",
2162 ),
2163 Meta {
2164 remarks: [
2165 Remark {
2166 ty: Substituted,
2167 rule_id: "@anything:replace",
2168 range: Some(
2169 (
2170 0,
2171 10,
2172 ),
2173 ),
2174 },
2175 ],
2176 errors: [],
2177 original_length: Some(
2178 13,
2179 ),
2180 original_value: None,
2181 },
2182 ),
2183 ],
2184 ),
2185 ],
2186 ),
2187 },
2188 )
2189 "###));
2190 }
2191 }
2192
2193 #[test]
2194 fn test_tuple_array_scrubbed_with_string_selector_and_password_matcher() {
2195 let config = serde_json::from_str::<PiiConfig>(
2196 r##"
2197 {
2198 "applications": {
2199 "$string": ["@password:remove"]
2200 }
2201 }
2202 "##,
2203 )
2204 .unwrap();
2205
2206 let mut event = Event::from_value(
2207 serde_json::json!(
2208 {
2209 "message": "hi",
2210 "exception": {
2211 "values": [
2212 {
2213 "type": "BrokenException",
2214 "value": "Something failed",
2215 "stacktrace": {
2216 "frames": [
2217 {
2218 "vars": {
2219 "headers": [
2220 ["authorization", "abc123"]
2221 ]
2222 }
2223 }
2224 ]
2225 }
2226 }
2227 ]
2228 }
2229 })
2230 .into(),
2231 );
2232
2233 let mut processor = PiiProcessor::new(config.compiled());
2234 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2235
2236 let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
2237
2238 assert_debug_snapshot!(vars, @r###"
2239 FrameVars(
2240 {
2241 "headers": Array(
2242 [
2243 Array(
2244 [
2245 String(
2246 "authorization",
2247 ),
2248 Meta {
2249 remarks: [
2250 Remark {
2251 ty: Removed,
2252 rule_id: "@password:remove",
2253 range: None,
2254 },
2255 ],
2256 errors: [],
2257 original_length: None,
2258 original_value: None,
2259 },
2260 ],
2261 ),
2262 ],
2263 ),
2264 },
2265 )
2266 "###);
2267 }
2268}