1use std::borrow::Cow;
2use std::collections::{BTreeMap, BTreeSet};
3use std::mem;
4use std::sync::OnceLock;
5
6use regex::Regex;
7use relay_event_schema::processor::{
8 self, Chunk, FieldAttrs, Pii, ProcessValue, ProcessingAction, ProcessingResult,
9 ProcessingState, Processor, ValueType, enum_set, process_value,
10};
11use relay_event_schema::protocol::{
12 AsPair, Event, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User,
13};
14use relay_protocol::{Annotated, Array, Meta, Remark, RemarkType, Value};
15
16use crate::compiledconfig::{CompiledPiiConfig, RuleRef};
17use crate::config::RuleType;
18use crate::redactions::Redaction;
19use crate::regexes::{self, ANYTHING_REGEX, PatternType, ReplaceBehavior};
20use crate::utils;
21
22#[derive(Debug, Clone, Copy)]
24pub enum AttributeMode {
25 Object,
28 ValueOnly,
31}
32
33pub struct PiiProcessor<'a> {
35 attribute_mode: AttributeMode,
37 compiled_config: &'a CompiledPiiConfig,
38}
39
40impl<'a> PiiProcessor<'a> {
41 pub fn new(compiled_config: &'a CompiledPiiConfig) -> PiiProcessor<'a> {
43 PiiProcessor {
46 compiled_config,
47 attribute_mode: AttributeMode::Object,
48 }
49 }
50
51 pub fn attribute_mode(mut self, attribute_mode: AttributeMode) -> Self {
53 self.attribute_mode = attribute_mode;
54 self
55 }
56
57 fn apply_all_rules(
58 &self,
59 meta: &mut Meta,
60 state: &ProcessingState<'_>,
61 mut value: Option<&mut String>,
62 ) -> ProcessingResult {
63 let pii = state.pii();
64 if pii == Pii::False {
65 return Ok(());
66 }
67
68 for (selector, rules) in self.compiled_config.applications.iter() {
69 if selector.matches_path(&state.path()) {
70 #[allow(clippy::needless_option_as_deref)]
71 for rule in rules {
72 let reborrowed_value = value.as_deref_mut();
73 apply_rule_to_value(meta, rule, state.path().key(), reborrowed_value)?;
74 }
75 }
76 }
77
78 Ok(())
79 }
80}
81
82impl Processor for PiiProcessor<'_> {
83 fn before_process<T: ProcessValue>(
84 &mut self,
85 value: Option<&T>,
86 meta: &mut Meta,
87 state: &ProcessingState<'_>,
88 ) -> ProcessingResult {
89 if let Some(Value::String(original_value)) = meta.original_value_as_mut() {
90 if let Some(parent) = state.iter().next() {
94 let path = state.path();
95 let new_state = parent.enter_borrowed(
96 path.key().unwrap_or(""),
97 Some(Cow::Borrowed(state.attrs())),
98 enum_set!(ValueType::String),
99 );
100
101 if self
102 .apply_all_rules(&mut Meta::default(), &new_state, Some(original_value))
103 .is_err()
104 {
105 meta.set_original_value(Option::<String>::None);
107 }
108 }
109 }
110
111 if state.value_type().contains(ValueType::Boolean)
113 || state.value_type().contains(ValueType::String)
114 {
115 return Ok(());
116 }
117
118 if value.is_none() {
119 return Ok(());
120 }
121
122 self.apply_all_rules(meta, state, None)
124 }
125
126 fn process_array<T>(
127 &mut self,
128 array: &mut Array<T>,
129 _meta: &mut Meta,
130 state: &ProcessingState<'_>,
131 ) -> ProcessingResult
132 where
133 T: ProcessValue,
134 {
135 if is_pairlist(array) {
136 for annotated in array {
137 let mut mapped = mem::take(annotated).map_value(T::into_value);
138
139 if let Some(Value::Array(pair)) = mapped.value_mut() {
140 let mut value = mem::take(&mut pair[1]);
141 let value_type = ValueType::for_field(&value);
142
143 if let Some(key_name) = &pair[0].as_str() {
144 let key_state =
147 state.enter_borrowed(key_name, state.inner_attrs(), value_type);
148 process_value(&mut value, self, &key_state)?;
151 }
152
153 pair[1] = value;
155 }
156
157 *annotated = T::from_value(mapped);
159 }
160
161 Ok(())
162 } else {
163 array.process_child_values(self, state)
165 }
166 }
167
168 fn process_string(
169 &mut self,
170 value: &mut String,
171 meta: &mut Meta,
172 state: &ProcessingState<'_>,
173 ) -> ProcessingResult {
174 if let "" | "true" | "false" | "null" | "undefined" = value.as_str() {
175 return Ok(());
176 }
177
178 self.apply_all_rules(meta, state, Some(value))
181 }
182
183 fn process_native_image_path(
184 &mut self,
185 NativeImagePath(value): &mut NativeImagePath,
186 meta: &mut Meta,
187 state: &ProcessingState<'_>,
188 ) -> ProcessingResult {
189 if let Some(index) = value.rfind(['/', '\\']) {
200 let basename = value.split_off(index);
201 match self.process_string(value, meta, state) {
202 Ok(()) => value.push_str(&basename),
203 Err(
204 ProcessingAction::DeleteValueHard
205 | ProcessingAction::DeleteValueWithRemark(_)
206 | ProcessingAction::DeleteValueSoft,
207 ) => {
208 basename[1..].clone_into(value);
209 }
210 Err(ProcessingAction::InvalidTransaction(x)) => {
211 return Err(ProcessingAction::InvalidTransaction(x));
212 }
213 }
214 }
215
216 Ok(())
217 }
218
219 fn process_pairlist<T: ProcessValue + AsPair>(
220 &mut self,
221 value: &mut PairList<T>,
222 _meta: &mut Meta,
223 state: &ProcessingState,
224 ) -> ProcessingResult {
225 utils::process_pairlist(self, value, state)
226 }
227
228 fn process_attributes(
229 &mut self,
230 value: &mut relay_event_schema::protocol::Attributes,
231 _meta: &mut Meta,
232 state: &ProcessingState,
233 ) -> ProcessingResult {
234 match self.attribute_mode {
235 AttributeMode::Object => value.process_child_values(self, state),
237 AttributeMode::ValueOnly => {
239 for (key, attribute) in value.0.iter_mut() {
240 let Some(attribute) = attribute.value_mut() else {
241 continue;
242 };
243
244 let attrs = FieldAttrs::new()
249 .pii_dynamic(relay_event_schema::protocol::attribute_pii_from_conventions);
250 let inner_value = &mut attribute.value.value;
251 let inner_value_type = ValueType::for_field(inner_value);
252 let entered =
253 state.enter_borrowed(key, Some(Cow::Borrowed(&attrs)), inner_value_type);
254
255 processor::process_value(inner_value, self, &entered)?;
256 self.process_other(&mut attribute.other, state)?;
257 }
258 Ok(())
259 }
260 }
261 }
262
263 fn process_user(
264 &mut self,
265 user: &mut User,
266 _meta: &mut Meta,
267 state: &ProcessingState<'_>,
268 ) -> ProcessingResult {
269 let ip_was_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
270
271 user.process_child_values(self, state)?;
273
274 let has_other_fields = user.id.value().is_some()
275 || user.username.value().is_some()
276 || user.email.value().is_some();
277
278 let ip_is_still_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
279
280 if ip_was_valid && !has_other_fields && !ip_is_still_valid {
290 user.id = mem::take(&mut user.ip_address).map_value(|ip| ip.into_inner().into());
291 user.ip_address.meta_mut().add_remark(Remark::new(
292 RemarkType::Removed,
293 "pii:ip_address".to_owned(),
294 ));
295 }
296
297 Ok(())
298 }
299
300 fn process_replay(
302 &mut self,
303 replay: &mut Replay,
304 _meta: &mut Meta,
305 state: &ProcessingState<'_>,
306 ) -> ProcessingResult {
307 replay.process_child_values(self, state)?;
308 Ok(())
309 }
310}
311
312#[derive(Default)]
313struct PairListProcessor {
314 is_pair: bool,
315 has_string_key: bool,
316}
317
318impl PairListProcessor {
319 fn is_pair_array(&self) -> bool {
322 self.is_pair && self.has_string_key
323 }
324}
325
326impl Processor for PairListProcessor {
327 fn process_array<T>(
328 &mut self,
329 value: &mut Array<T>,
330 _meta: &mut Meta,
331 state: &ProcessingState<'_>,
332 ) -> ProcessingResult
333 where
334 T: ProcessValue,
335 {
336 self.is_pair = state.depth() == 0 && value.len() == 2;
337 if self.is_pair {
338 let key_type = ValueType::for_field(&value[0]);
339 process_value(
340 &mut value[0],
341 self,
342 &state.enter_index(0, state.inner_attrs(), key_type),
343 )?;
344 }
345
346 Ok(())
347 }
348
349 fn process_string(
350 &mut self,
351 _value: &mut String,
352 _meta: &mut Meta,
353 state: &ProcessingState<'_>,
354 ) -> ProcessingResult where {
355 if state.depth() == 1 && state.path().index() == Some(0) {
356 self.has_string_key = true;
357 }
358
359 Ok(())
360 }
361}
362
363fn is_pairlist<T: ProcessValue>(array: &mut Array<T>) -> bool {
364 for element in array.iter_mut() {
365 let mut visitor = PairListProcessor::default();
366 process_value(element, &mut visitor, ProcessingState::root()).ok();
367 if !visitor.is_pair_array() {
368 return false;
369 }
370 }
371
372 !array.is_empty()
373}
374
375pub fn scrub_graphql(event: &mut Event) {
377 let mut keys: BTreeSet<&str> = BTreeSet::new();
378
379 let mut is_graphql = false;
380
381 if let Some(request) = event.request.value_mut()
383 && let Some(Value::Object(data)) = request.data.value_mut()
384 {
385 if let Some(api_target) = request.api_target.value()
386 && api_target.eq_ignore_ascii_case("graphql")
387 {
388 is_graphql = true;
389 }
390
391 if is_graphql
392 && let Some(Annotated(Some(Value::Object(variables)), _)) = data.get_mut("variables")
393 {
394 for (key, value) in variables.iter_mut() {
395 keys.insert(key);
396 value.set_value(Some(Value::String("[Filtered]".to_owned())));
397 }
398 }
399 }
400
401 if !is_graphql {
402 return;
403 }
404
405 if let Some(contexts) = event.contexts.value_mut()
407 && let Some(response) = contexts.get_mut::<ResponseContext>()
408 && let Some(Value::Object(data)) = response.data.value_mut()
409 && let Some(Annotated(Some(Value::Object(graphql_data)), _)) = data.get_mut("data")
410 {
411 if !keys.is_empty() {
412 scrub_graphql_data(&keys, graphql_data);
413 } else {
414 data.remove("data");
417 }
418 }
419}
420
421fn scrub_graphql_data(keys: &BTreeSet<&str>, data: &mut BTreeMap<String, Annotated<Value>>) {
423 for (key, value) in data.iter_mut() {
424 match value.value_mut() {
425 Some(Value::Object(item_data)) => {
426 scrub_graphql_data(keys, item_data);
427 }
428 _ => {
429 if keys.contains(key.as_str()) {
430 value.set_value(Some(Value::String("[Filtered]".to_owned())));
431 }
432 }
433 }
434 }
435}
436
437fn apply_rule_to_value(
438 meta: &mut Meta,
439 rule: &RuleRef,
440 key: Option<&str>,
441 mut value: Option<&mut String>,
442) -> ProcessingResult {
443 let should_redact_chunks = !matches!(rule.redaction, Redaction::Default | Redaction::Remove);
446
447 if rule.ty == RuleType::Anything && (value.is_none() || !should_redact_chunks) {
450 meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
452 return Err(ProcessingAction::DeleteValueHard);
453 }
454
455 macro_rules! apply_regex {
456 ($regex:expr, $replace_behavior:expr) => {
457 if let Some(ref mut value) = value {
458 processor::process_chunked_value(value, meta, |chunks| {
459 apply_regex_to_chunks(chunks, rule, $regex, $replace_behavior)
460 });
461 }
462 };
463 }
464
465 for (pattern_type, regex, replace_behavior) in regexes::get_regex_for_rule_type(&rule.ty) {
466 match pattern_type {
467 PatternType::KeyValue => {
468 if regex.is_match(key.unwrap_or("")) {
469 if value.is_some() && should_redact_chunks {
470 apply_regex!(&ANYTHING_REGEX, replace_behavior);
473 } else {
474 meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
475 return Err(ProcessingAction::DeleteValueHard);
476 }
477 } else {
478 apply_regex!(regex, replace_behavior);
481 }
482 }
483 PatternType::Value => {
484 apply_regex!(regex, replace_behavior);
485 }
486 }
487 }
488
489 Ok(())
490}
491
492fn apply_regex_to_chunks<'a>(
493 chunks: Vec<Chunk<'a>>,
494 rule: &RuleRef,
495 regex: &Regex,
496 replace_behavior: ReplaceBehavior,
497) -> Vec<Chunk<'a>> {
498 let mut search_string = String::new();
503 let mut has_text = false;
504 for chunk in &chunks {
505 match chunk {
506 Chunk::Text { text } => {
507 has_text = true;
508 search_string.push_str(&text.replace('\x00', ""));
509 }
510 Chunk::Redaction { .. } => search_string.push('\x00'),
511 }
512 }
513
514 if !has_text {
515 return chunks;
517 }
518
519 let mut captures_iter = regex.captures_iter(&search_string).peekable();
521 if captures_iter.peek().is_none() {
522 return chunks;
523 }
524
525 let mut replacement_chunks = vec![];
526 for chunk in chunks {
527 if let Chunk::Redaction { .. } = chunk {
528 replacement_chunks.push(chunk);
529 }
530 }
531 replacement_chunks.reverse();
532
533 fn process_text<'a>(
534 text: &str,
535 rv: &mut Vec<Chunk<'a>>,
536 replacement_chunks: &mut Vec<Chunk<'a>>,
537 ) {
538 if text.is_empty() {
539 return;
540 }
541
542 static NULL_SPLIT_RE: OnceLock<Regex> = OnceLock::new();
543 let regex = NULL_SPLIT_RE.get_or_init(|| {
544 #[allow(clippy::trivial_regex)]
545 Regex::new("\x00").unwrap()
546 });
547
548 let mut pos = 0;
549 for piece in regex.find_iter(text) {
550 rv.push(Chunk::Text {
551 text: Cow::Owned(text[pos..piece.start()].to_string()),
552 });
553 rv.push(replacement_chunks.pop().unwrap());
554 pos = piece.end();
555 }
556
557 rv.push(Chunk::Text {
558 text: Cow::Owned(text[pos..].to_string()),
559 });
560 }
561
562 let mut pos = 0;
563 let mut rv = Vec::with_capacity(replacement_chunks.len());
564
565 match replace_behavior {
566 ReplaceBehavior::Groups(ref groups) => {
567 for m in captures_iter {
568 for (idx, g) in m.iter().enumerate() {
569 if let Some(g) = g
570 && groups.contains(&(idx as u8))
571 {
572 process_text(
573 &search_string[pos..g.start()],
574 &mut rv,
575 &mut replacement_chunks,
576 );
577 insert_replacement_chunks(rule, g.as_str(), &mut rv);
578 pos = g.end();
579 }
580 }
581 }
582 process_text(&search_string[pos..], &mut rv, &mut replacement_chunks);
583 debug_assert!(replacement_chunks.is_empty());
584 }
585 ReplaceBehavior::Value => {
586 insert_replacement_chunks(rule, &search_string, &mut rv);
590 }
591 }
592 rv
593}
594
595fn insert_replacement_chunks(rule: &RuleRef, text: &str, output: &mut Vec<Chunk<'_>>) {
596 match &rule.redaction {
597 Redaction::Default | Redaction::Remove => {
598 output.push(Chunk::Redaction {
599 text: Cow::Borrowed(""),
600 rule_id: Cow::Owned(rule.origin.to_string()),
601 ty: RemarkType::Removed,
602 });
603 }
604 Redaction::Mask => {
605 let buf = vec!['*'; text.chars().count()];
606
607 output.push(Chunk::Redaction {
608 ty: RemarkType::Masked,
609 rule_id: Cow::Owned(rule.origin.to_string()),
610 text: buf.into_iter().collect(),
611 })
612 }
613 Redaction::Hash => {
614 output.push(Chunk::Redaction {
615 ty: RemarkType::Pseudonymized,
616 rule_id: Cow::Owned(rule.origin.to_string()),
617 text: Cow::Owned(utils::hash_value(text.as_bytes())),
618 });
619 }
620 Redaction::Replace(replace) => {
621 output.push(Chunk::Redaction {
622 ty: RemarkType::Substituted,
623 rule_id: Cow::Owned(rule.origin.to_string()),
624 text: Cow::Owned(replace.text.clone()),
625 });
626 }
627 Redaction::Other => relay_log::debug!("Incoming redaction is not supported"),
628 }
629}
630
631#[cfg(test)]
632mod tests {
633 use insta::{allow_duplicates, assert_debug_snapshot};
634 use relay_event_schema::processor::process_value;
635 use relay_event_schema::protocol::{
636 Addr, Breadcrumb, DebugImage, DebugMeta, ExtraValue, Headers, LogEntry, Message,
637 NativeDebugImage, Request, Span, TagEntry, Tags, TraceContext,
638 };
639 use relay_protocol::{FromValue, Object, assert_annotated_snapshot, get_value};
640 use serde_json::json;
641
642 use super::*;
643 use crate::{DataScrubbingConfig, PiiConfig, ReplaceRedaction};
644
645 fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Option<PiiConfig> {
646 use crate::convert::to_pii_config as to_pii_config_impl;
647 let rv = to_pii_config_impl(datascrubbing_config).unwrap();
648 if let Some(ref config) = rv {
649 let roundtrip: PiiConfig =
650 serde_json::from_value(serde_json::to_value(config).unwrap()).unwrap();
651 assert_eq!(&roundtrip, config);
652 }
653 rv
654 }
655
656 #[test]
657 fn test_scrub_original_value() {
658 let mut data = Event::from_value(
659 json!({
660 "user": {
661 "username": "hey man 73.133.27.120", "ip_address": "is this an ip address? 73.133.27.120", },
664 "hpkp":"invalid data my ip address is 74.133.27.120 and my credit card number is 4571234567890111 ",
665 })
666 .into(),
667 );
668
669 let scrubbing_config = DataScrubbingConfig {
670 scrub_data: true,
671 scrub_ip_addresses: true,
672 scrub_defaults: true,
673 ..Default::default()
674 };
675
676 let pii_config = to_pii_config(&scrubbing_config).unwrap();
677 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
678
679 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
680
681 assert_debug_snapshot!(&data);
682 }
683
684 #[test]
685 fn test_sentry_user() {
686 let mut data = Event::from_value(
687 json!({
688 "user": {
689 "ip_address": "73.133.27.120",
690 "sentry_user": "ip:73.133.27.120",
691 },
692 })
693 .into(),
694 );
695
696 let scrubbing_config = DataScrubbingConfig {
697 scrub_data: true,
698 scrub_ip_addresses: true,
699 scrub_defaults: true,
700 ..Default::default()
701 };
702
703 let pii_config = to_pii_config(&scrubbing_config).unwrap();
704 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
705
706 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
707
708 assert_debug_snapshot!(&data);
709 }
710
711 #[test]
712 fn test_basic_stripping() {
713 let config = serde_json::from_str::<PiiConfig>(
714 r#"
715 {
716 "rules": {
717 "remove_bad_headers": {
718 "type": "redact_pair",
719 "keyPattern": "(?i)cookie|secret[-_]?key"
720 }
721 },
722 "applications": {
723 "$string": ["@ip"],
724 "$object.**": ["remove_bad_headers"]
725 }
726 }
727 "#,
728 )
729 .unwrap();
730
731 let mut event = Annotated::new(Event {
732 logentry: Annotated::new(LogEntry {
733 formatted: Annotated::new("Hello world!".to_owned().into()),
734 ..Default::default()
735 }),
736 request: Annotated::new(Request {
737 env: {
738 let mut rv = Object::new();
739 rv.insert(
740 "SECRET_KEY".to_owned(),
741 Annotated::new(Value::String("134141231231231231231312".into())),
742 );
743 Annotated::new(rv)
744 },
745 headers: {
746 let rv = vec![
747 Annotated::new((
748 Annotated::new("Cookie".to_owned().into()),
749 Annotated::new("super secret".to_owned().into()),
750 )),
751 Annotated::new((
752 Annotated::new("X-Forwarded-For".to_owned().into()),
753 Annotated::new("127.0.0.1".to_owned().into()),
754 )),
755 ];
756 Annotated::new(Headers(PairList(rv)))
757 },
758 ..Default::default()
759 }),
760 tags: Annotated::new(Tags(
761 vec![Annotated::new(TagEntry(
762 Annotated::new("forwarded_for".to_owned()),
763 Annotated::new("127.0.0.1".to_owned()),
764 ))]
765 .into(),
766 )),
767 ..Default::default()
768 });
769
770 let mut processor = PiiProcessor::new(config.compiled());
771 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
772 assert_annotated_snapshot!(event);
773 }
774
775 #[test]
776 fn test_redact_containers() {
777 let config = serde_json::from_str::<PiiConfig>(
778 r#"
779 {
780 "applications": {
781 "$object": ["@anything"]
782 }
783 }
784 "#,
785 )
786 .unwrap();
787
788 let mut event = Annotated::new(Event {
789 extra: {
790 let mut map = Object::new();
791 map.insert(
792 "foo".to_owned(),
793 Annotated::new(ExtraValue(Value::String("bar".to_owned()))),
794 );
795 Annotated::new(map)
796 },
797 ..Default::default()
798 });
799
800 let mut processor = PiiProcessor::new(config.compiled());
801 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
802 assert_annotated_snapshot!(event);
803 }
804
805 #[test]
806 fn test_redact_custom_pattern() {
807 let config = serde_json::from_str::<PiiConfig>(
808 r#"
809 {
810 "applications": {
811 "$string": ["myrule"]
812 },
813 "rules": {
814 "myrule": {
815 "type": "pattern",
816 "pattern": "foo",
817 "redaction": {
818 "method": "replace",
819 "text": "asd"
820 }
821 }
822 }
823 }
824 "#,
825 )
826 .unwrap();
827
828 let mut event = Annotated::new(Event {
829 extra: {
830 let mut map = Object::new();
831 map.insert(
832 "myvalue".to_owned(),
833 Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
834 );
835 Annotated::new(map)
836 },
837 ..Default::default()
838 });
839
840 let mut processor = PiiProcessor::new(config.compiled());
841 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
842 assert_annotated_snapshot!(event);
843 }
844
845 #[test]
846 fn test_redact_custom_negative_pattern() {
847 let config = serde_json::from_str::<PiiConfig>(
848 r#"
849 {
850 "applications": {
851 "$string": ["myrule"]
852 },
853 "rules": {
854 "myrule": {
855 "type": "pattern",
856 "pattern": "the good string|.*OK.*|(.*)",
857 "replaceGroups": [1],
858 "redaction": {
859 "method": "mask"
860 }
861 }
862 }
863 }
864 "#,
865 )
866 .unwrap();
867
868 let mut event = Annotated::<Event>::from_json(
869 r#"{
870 "extra": {
871 "1": "the good string",
872 "2": "a bad string",
873 "3": "another OK string",
874 "4": "another bad one"
875 }
876 }"#,
877 )
878 .unwrap();
879
880 let mut processor = PiiProcessor::new(config.compiled());
881 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
882 assert_annotated_snapshot!(event.value().unwrap().extra, @r#"
883 {
884 "1": "the good string",
885 "2": "************",
886 "3": "another OK string",
887 "4": "***************",
888 "_meta": {
889 "2": {
890 "": {
891 "rem": [
892 [
893 "myrule",
894 "m",
895 0,
896 12
897 ]
898 ],
899 "len": 12
900 }
901 },
902 "4": {
903 "": {
904 "rem": [
905 [
906 "myrule",
907 "m",
908 0,
909 15
910 ]
911 ],
912 "len": 15
913 }
914 }
915 }
916 }
917 "#);
918 }
919
920 #[test]
921 fn test_no_field_upsert() {
922 let config = serde_json::from_str::<PiiConfig>(
923 r#"
924 {
925 "applications": {
926 "**": ["@anything:remove"]
927 }
928 }
929 "#,
930 )
931 .unwrap();
932
933 let mut event = Annotated::new(Event {
934 extra: {
935 let mut map = Object::new();
936 map.insert(
937 "myvalue".to_owned(),
938 Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
939 );
940 Annotated::new(map)
941 },
942 ..Default::default()
943 });
944
945 let mut processor = PiiProcessor::new(config.compiled());
946 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
947 assert_annotated_snapshot!(event);
948 }
949
950 #[test]
951 fn test_anything_hash_on_string() {
952 let config = serde_json::from_str::<PiiConfig>(
953 r#"
954 {
955 "applications": {
956 "$string": ["@anything:hash"]
957 }
958 }
959 "#,
960 )
961 .unwrap();
962
963 let mut event = Annotated::new(Event {
964 extra: {
965 let mut map = Object::new();
966 map.insert(
967 "myvalue".to_owned(),
968 Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
969 );
970 Annotated::new(map)
971 },
972 ..Default::default()
973 });
974
975 let mut processor = PiiProcessor::new(config.compiled());
976 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
977 assert_annotated_snapshot!(event);
978 }
979
980 #[test]
981 fn test_anything_hash_on_container() {
982 let config = serde_json::from_str::<PiiConfig>(
983 r#"
984 {
985 "applications": {
986 "$object": ["@anything:hash"]
987 }
988 }
989 "#,
990 )
991 .unwrap();
992
993 let mut event = Annotated::new(Event {
994 extra: {
995 let mut map = Object::new();
996 map.insert(
997 "myvalue".to_owned(),
998 Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
999 );
1000 Annotated::new(map)
1001 },
1002 ..Default::default()
1003 });
1004
1005 let mut processor = PiiProcessor::new(config.compiled());
1006 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1007 assert_annotated_snapshot!(event);
1008 }
1009
1010 #[test]
1011 fn test_ignore_user_agent_ip_scrubbing() {
1012 let mut data = Event::from_value(
1013 json!({
1014 "request": {
1015 "headers": [
1016 ["User-Agent", "127.0.0.1"],
1017 ["X-Client-Ip", "10.0.0.1"]
1018 ]
1019 },
1020 })
1021 .into(),
1022 );
1023
1024 let scrubbing_config = DataScrubbingConfig {
1025 scrub_data: true,
1026 scrub_ip_addresses: true,
1027 scrub_defaults: true,
1028 ..Default::default()
1029 };
1030
1031 let pii_config = to_pii_config(&scrubbing_config).unwrap();
1032 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1033
1034 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1035
1036 assert_annotated_snapshot!(&data);
1037 }
1038
1039 #[test]
1040 fn test_remove_debugmeta_path() {
1041 let config = serde_json::from_str::<PiiConfig>(
1042 r#"
1043 {
1044 "applications": {
1045 "debug_meta.images.*.code_file": ["@anything:remove"],
1046 "debug_meta.images.*.debug_file": ["@anything:remove"]
1047 }
1048 }
1049 "#,
1050 )
1051 .unwrap();
1052
1053 let mut event = Annotated::new(Event {
1054 debug_meta: Annotated::new(DebugMeta {
1055 images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1056 NativeDebugImage {
1057 code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1058 code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1059 debug_id: Annotated::new(
1060 "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1061 ),
1062 debug_file: Annotated::new("wntdll.pdb".into()),
1063 debug_checksum: Annotated::empty(),
1064 arch: Annotated::new("arm64".to_owned()),
1065 image_addr: Annotated::new(Addr(0)),
1066 image_size: Annotated::new(4096),
1067 image_vmaddr: Annotated::new(Addr(32768)),
1068 other: {
1069 let mut map = Object::new();
1070 map.insert(
1071 "other".to_owned(),
1072 Annotated::new(Value::String("value".to_owned())),
1073 );
1074 map
1075 },
1076 },
1077 )))]),
1078 ..Default::default()
1079 }),
1080 ..Default::default()
1081 });
1082
1083 let mut processor = PiiProcessor::new(config.compiled());
1084 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1085 assert_annotated_snapshot!(event);
1086 }
1087
1088 #[test]
1089 fn test_replace_debugmeta_path() {
1090 let config = serde_json::from_str::<PiiConfig>(
1091 r#"
1092 {
1093 "applications": {
1094 "debug_meta.images.*.code_file": ["@anything:replace"],
1095 "debug_meta.images.*.debug_file": ["@anything:replace"]
1096 }
1097 }
1098 "#,
1099 )
1100 .unwrap();
1101
1102 let mut event = Annotated::new(Event {
1103 debug_meta: Annotated::new(DebugMeta {
1104 images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1105 NativeDebugImage {
1106 code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1107 code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1108 debug_id: Annotated::new(
1109 "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1110 ),
1111 debug_file: Annotated::new("wntdll.pdb".into()),
1112 debug_checksum: Annotated::empty(),
1113 arch: Annotated::new("arm64".to_owned()),
1114 image_addr: Annotated::new(Addr(0)),
1115 image_size: Annotated::new(4096),
1116 image_vmaddr: Annotated::new(Addr(32768)),
1117 other: {
1118 let mut map = Object::new();
1119 map.insert(
1120 "other".to_owned(),
1121 Annotated::new(Value::String("value".to_owned())),
1122 );
1123 map
1124 },
1125 },
1126 )))]),
1127 ..Default::default()
1128 }),
1129 ..Default::default()
1130 });
1131
1132 let mut processor = PiiProcessor::new(config.compiled());
1133 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1134 assert_annotated_snapshot!(event);
1135 }
1136
1137 #[test]
1138 fn test_hash_debugmeta_path() {
1139 let config = serde_json::from_str::<PiiConfig>(
1140 r#"
1141 {
1142 "applications": {
1143 "debug_meta.images.*.code_file": ["@anything:hash"],
1144 "debug_meta.images.*.debug_file": ["@anything:hash"]
1145 }
1146 }
1147 "#,
1148 )
1149 .unwrap();
1150
1151 let mut event = Annotated::new(Event {
1152 debug_meta: Annotated::new(DebugMeta {
1153 images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1154 NativeDebugImage {
1155 code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1156 code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1157 debug_id: Annotated::new(
1158 "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1159 ),
1160 debug_file: Annotated::new("wntdll.pdb".into()),
1161 debug_checksum: Annotated::empty(),
1162 arch: Annotated::new("arm64".to_owned()),
1163 image_addr: Annotated::new(Addr(0)),
1164 image_size: Annotated::new(4096),
1165 image_vmaddr: Annotated::new(Addr(32768)),
1166 other: {
1167 let mut map = Object::new();
1168 map.insert(
1169 "other".to_owned(),
1170 Annotated::new(Value::String("value".to_owned())),
1171 );
1172 map
1173 },
1174 },
1175 )))]),
1176 ..Default::default()
1177 }),
1178 ..Default::default()
1179 });
1180
1181 let mut processor = PiiProcessor::new(config.compiled());
1182 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1183 assert_annotated_snapshot!(event);
1184 }
1185
1186 #[test]
1187 fn test_debugmeta_path_not_addressible_with_wildcard_selector() {
1188 let config = serde_json::from_str::<PiiConfig>(
1189 r#"
1190 {
1191 "applications": {
1192 "$string": ["@anything:remove"],
1193 "**": ["@anything:remove"],
1194 "debug_meta.**": ["@anything:remove"],
1195 "(debug_meta.images.**.code_file & $string)": ["@anything:remove"]
1196 }
1197 }
1198 "#,
1199 )
1200 .unwrap();
1201
1202 let mut event = Annotated::new(Event {
1203 debug_meta: Annotated::new(DebugMeta {
1204 images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1205 NativeDebugImage {
1206 code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1207 code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1208 debug_id: Annotated::new(
1209 "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1210 ),
1211 debug_file: Annotated::new("wntdll.pdb".into()),
1212 debug_checksum: Annotated::empty(),
1213 arch: Annotated::new("arm64".to_owned()),
1214 image_addr: Annotated::new(Addr(0)),
1215 image_size: Annotated::new(4096),
1216 image_vmaddr: Annotated::new(Addr(32768)),
1217 other: {
1218 let mut map = Object::new();
1219 map.insert(
1220 "other".to_owned(),
1221 Annotated::new(Value::String("value".to_owned())),
1222 );
1223 map
1224 },
1225 },
1226 )))]),
1227 ..Default::default()
1228 }),
1229 ..Default::default()
1230 });
1231
1232 let mut processor = PiiProcessor::new(config.compiled());
1233 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1234 assert_annotated_snapshot!(event);
1235 }
1236
1237 #[test]
1238 fn test_quoted_keys() {
1239 let config = serde_json::from_str::<PiiConfig>(
1240 r#"
1241 {
1242 "applications": {
1243 "extra.'special ,./<>?!@#$%^&*())''gärbage'''": ["@anything:remove"]
1244 }
1245 }
1246 "#,
1247 )
1248 .unwrap();
1249
1250 let mut event = Annotated::new(Event {
1251 extra: {
1252 let mut map = Object::new();
1253 map.insert(
1254 "do not ,./<>?!@#$%^&*())'ßtrip'".to_owned(),
1255 Annotated::new(ExtraValue(Value::String("foo".to_owned()))),
1256 );
1257 map.insert(
1258 "special ,./<>?!@#$%^&*())'gärbage'".to_owned(),
1259 Annotated::new(ExtraValue(Value::String("bar".to_owned()))),
1260 );
1261 Annotated::new(map)
1262 },
1263 ..Default::default()
1264 });
1265
1266 let mut processor = PiiProcessor::new(config.compiled());
1267 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1268 assert_annotated_snapshot!(event);
1269 }
1270
1271 #[test]
1272 fn test_logentry_value_types() {
1273 for formatted_selector in &[
1275 "$logentry.formatted",
1276 "$message",
1277 "$logentry.formatted && $message",
1278 "$string",
1279 ] {
1280 let config = serde_json::from_str::<PiiConfig>(&format!(
1281 r##"
1282 {{
1283 "applications": {{
1284 "{formatted_selector}": ["@anything:remove"]
1285 }}
1286 }}
1287 "##
1288 ))
1289 .unwrap();
1290
1291 let mut event = Annotated::new(Event {
1292 logentry: Annotated::new(LogEntry {
1293 formatted: Annotated::new("Hello world!".to_owned().into()),
1294 ..Default::default()
1295 }),
1296 ..Default::default()
1297 });
1298
1299 let mut processor = PiiProcessor::new(config.compiled());
1300 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1301 assert!(
1302 event
1303 .value()
1304 .unwrap()
1305 .logentry
1306 .value()
1307 .unwrap()
1308 .formatted
1309 .value()
1310 .is_none()
1311 );
1312 }
1313 }
1314
1315 #[test]
1316 fn test_logentry_formatted_never_fully_filtered() {
1317 let config = crate::convert::to_pii_config(&crate::DataScrubbingConfig {
1320 scrub_data: true,
1321 scrub_defaults: true,
1322 scrub_ip_addresses: true,
1323 ..Default::default()
1324 })
1325 .unwrap()
1326 .unwrap();
1327
1328 let mut event = Annotated::new(Event {
1329 logentry: Annotated::new(LogEntry {
1330 formatted: Annotated::new(
1331 "User john.doe@company.com failed login with card 4111-1111-1111-1111"
1332 .to_owned()
1333 .into(),
1334 ),
1335 ..Default::default()
1336 }),
1337 ..Default::default()
1338 });
1339
1340 let mut processor = PiiProcessor::new(config.compiled());
1341 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1342 assert_annotated_snapshot!(event, @r#"
1343 {
1344 "logentry": {
1345 "formatted": "User [email] failed login with card [creditcard]"
1346 },
1347 "_meta": {
1348 "logentry": {
1349 "formatted": {
1350 "": {
1351 "rem": [
1352 [
1353 "@email:replace",
1354 "s",
1355 5,
1356 12
1357 ],
1358 [
1359 "@creditcard:replace",
1360 "s",
1361 36,
1362 48
1363 ]
1364 ],
1365 "len": 68
1366 }
1367 }
1368 }
1369 }
1370 }
1371 "#);
1372 }
1373
1374 #[test]
1375 fn test_logentry_formatted_bearer_token_scrubbing() {
1376 let config = crate::convert::to_pii_config(&crate::DataScrubbingConfig {
1378 scrub_data: true,
1379 scrub_defaults: true,
1380 ..Default::default()
1381 })
1382 .unwrap()
1383 .unwrap();
1384
1385 let mut event = Annotated::new(Event {
1386 logentry: Annotated::new(LogEntry {
1387 formatted: Annotated::new(
1388 "API request failed with Bearer ABC123XYZ789TOKEN and other data"
1389 .to_owned()
1390 .into(),
1391 ),
1392 ..Default::default()
1393 }),
1394 ..Default::default()
1395 });
1396
1397 let mut processor = PiiProcessor::new(config.compiled());
1398 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1399 assert_annotated_snapshot!(event, @r#"
1400 {
1401 "logentry": {
1402 "formatted": "API request failed with Bearer [token] and other data"
1403 },
1404 "_meta": {
1405 "logentry": {
1406 "formatted": {
1407 "": {
1408 "rem": [
1409 [
1410 "@bearer:replace",
1411 "s",
1412 24,
1413 38
1414 ]
1415 ],
1416 "len": 63
1417 }
1418 }
1419 }
1420 }
1421 }
1422 "#);
1423 }
1424
1425 #[test]
1426 fn test_logentry_formatted_password_word_not_scrubbed() {
1427 let config = PiiConfig::default();
1428 let mut event = Annotated::new(Event {
1429 logentry: Annotated::new(LogEntry {
1430 formatted: Annotated::new(
1431 "User password is secret123 for authentication"
1432 .to_owned()
1433 .into(),
1434 ),
1435 ..Default::default()
1436 }),
1437 ..Default::default()
1438 });
1439
1440 let mut processor = PiiProcessor::new(config.compiled());
1441 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1442 assert_annotated_snapshot!(event, @r#"
1443 {
1444 "logentry": {
1445 "formatted": "User password is secret123 for authentication"
1446 }
1447 }
1448 "#);
1449 }
1450
1451 #[test]
1452 fn test_ip_address_hashing() {
1453 let config = serde_json::from_str::<PiiConfig>(
1454 r#"
1455 {
1456 "applications": {
1457 "$user.ip_address": ["@ip:hash"]
1458 }
1459 }
1460 "#,
1461 )
1462 .unwrap();
1463
1464 let mut event = Annotated::new(Event {
1465 user: Annotated::new(User {
1466 ip_address: Annotated::new(IpAddr("127.0.0.1".to_owned())),
1467 ..Default::default()
1468 }),
1469 ..Default::default()
1470 });
1471
1472 let mut processor = PiiProcessor::new(config.compiled());
1473 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1474
1475 let user = event.value().unwrap().user.value().unwrap();
1476
1477 assert!(user.ip_address.value().is_none());
1478
1479 assert_eq!(
1480 user.id.value().unwrap().as_str(),
1481 "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1482 );
1483 }
1484
1485 #[test]
1486 fn test_ip_address_hashing_does_not_overwrite_id() {
1487 let config = serde_json::from_str::<PiiConfig>(
1488 r#"
1489 {
1490 "applications": {
1491 "$user.ip_address": ["@ip:hash"]
1492 }
1493 }
1494 "#,
1495 )
1496 .unwrap();
1497
1498 let mut event = Annotated::new(Event {
1499 user: Annotated::new(User {
1500 id: Annotated::new("123".to_owned().into()),
1501 ip_address: Annotated::new(IpAddr("127.0.0.1".to_owned())),
1502 ..Default::default()
1503 }),
1504 ..Default::default()
1505 });
1506
1507 let mut processor = PiiProcessor::new(config.compiled());
1508 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1509
1510 let user = event.value().unwrap().user.value().unwrap();
1511
1512 assert_eq!(
1514 user.ip_address.value().unwrap().as_str(),
1515 "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1516 );
1517
1518 assert_eq!(user.id.value().unwrap().as_str(), "123");
1519 }
1520
1521 #[test]
1522 fn test_replace_replaced_text() {
1523 let chunks = vec![Chunk::Redaction {
1524 text: "[ip]".into(),
1525 rule_id: "@ip".into(),
1526 ty: RemarkType::Substituted,
1527 }];
1528 let rule = RuleRef {
1529 id: "@ip:replace".into(),
1530 origin: "@ip".into(),
1531 ty: RuleType::Ip,
1532 redaction: Redaction::Replace(ReplaceRedaction {
1533 text: "[ip]".into(),
1534 }),
1535 };
1536 let res = apply_regex_to_chunks(
1537 chunks.clone(),
1538 &rule,
1539 &Regex::new(r#".*"#).unwrap(),
1540 ReplaceBehavior::Value,
1541 );
1542 assert_eq!(chunks, res);
1543 }
1544
1545 #[test]
1546 fn test_replace_replaced_text_anything() {
1547 let chunks = vec![Chunk::Redaction {
1548 text: "[Filtered]".into(),
1549 rule_id: "@password:filter".into(),
1550 ty: RemarkType::Substituted,
1551 }];
1552 let rule = RuleRef {
1553 id: "@anything:filter".into(),
1554 origin: "@anything:filter".into(),
1555 ty: RuleType::Anything,
1556 redaction: Redaction::Replace(ReplaceRedaction {
1557 text: "[Filtered]".into(),
1558 }),
1559 };
1560 let res = apply_regex_to_chunks(
1561 chunks.clone(),
1562 &rule,
1563 &Regex::new(r#".*"#).unwrap(),
1564 ReplaceBehavior::Groups(smallvec::smallvec![0]),
1565 );
1566 assert_eq!(chunks, res);
1567 }
1568
1569 #[test]
1570 fn test_trace_route_params_scrubbed() {
1571 let mut trace_context: Annotated<TraceContext> = Annotated::from_json(
1572 r#"
1573 {
1574 "type": "trace",
1575 "trace_id": "4c79f60c11214eb38604f4ae0781bfb2",
1576 "span_id": "fa90fdead5f74052",
1577 "data": {
1578 "previousRoute": {
1579 "params": {
1580 "password": "test"
1581 }
1582 }
1583 }
1584 }
1585 "#,
1586 )
1587 .unwrap();
1588
1589 let ds_config = DataScrubbingConfig {
1590 scrub_data: true,
1591 scrub_defaults: true,
1592 ..Default::default()
1593 };
1594 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1595 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1596
1597 process_value(
1598 &mut trace_context,
1599 &mut pii_processor,
1600 ProcessingState::root(),
1601 )
1602 .unwrap();
1603 assert_annotated_snapshot!(trace_context);
1604 }
1605
1606 #[test]
1607 fn test_scrub_span_data_http_not_scrubbed() {
1608 let mut span: Annotated<Span> = Annotated::from_json(
1609 r#"{
1610 "data": {
1611 "http": {
1612 "query": "dance=true"
1613 }
1614 }
1615 }"#,
1616 )
1617 .unwrap();
1618
1619 let ds_config = DataScrubbingConfig {
1620 scrub_data: true,
1621 scrub_defaults: true,
1622 ..Default::default()
1623 };
1624 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1625 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1626
1627 process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1628 assert_annotated_snapshot!(span);
1629 }
1630
1631 #[test]
1632 fn test_scrub_span_data_http_strings_are_scrubbed() {
1633 let mut span: Annotated<Span> = Annotated::from_json(
1634 r#"{
1635 "data": {
1636 "http": {
1637 "query": "ccnumber=5105105105105100&process_id=123",
1638 "fragment": "ccnumber=5105105105105100,process_id=123"
1639 }
1640 }
1641 }"#,
1642 )
1643 .unwrap();
1644
1645 let ds_config = DataScrubbingConfig {
1646 scrub_data: true,
1647 scrub_defaults: true,
1648 ..Default::default()
1649 };
1650 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1651 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1652
1653 process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1654 assert_annotated_snapshot!(span);
1655 }
1656
1657 #[test]
1658 fn test_scrub_span_data_http_objects_are_scrubbed() {
1659 let mut span: Annotated<Span> = Annotated::from_json(
1660 r#"{
1661 "data": {
1662 "http": {
1663 "query": {
1664 "ccnumber": "5105105105105100",
1665 "process_id": "123"
1666 },
1667 "fragment": {
1668 "ccnumber": "5105105105105100",
1669 "process_id": "123"
1670 }
1671 }
1672 }
1673 }"#,
1674 )
1675 .unwrap();
1676
1677 let ds_config = DataScrubbingConfig {
1678 scrub_data: true,
1679 scrub_defaults: true,
1680 ..Default::default()
1681 };
1682 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1683 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1684
1685 process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1686 assert_annotated_snapshot!(span);
1687 }
1688
1689 #[test]
1690 fn test_scrub_span_data_untyped_props_are_scrubbed() {
1691 let mut span: Annotated<Span> = Annotated::from_json(
1692 r#"{
1693 "data": {
1694 "untyped": "ccnumber=5105105105105100",
1695 "more_untyped": {
1696 "typed": "no",
1697 "scrubbed": "yes",
1698 "ccnumber": "5105105105105100"
1699 }
1700 }
1701 }"#,
1702 )
1703 .unwrap();
1704
1705 let ds_config = DataScrubbingConfig {
1706 scrub_data: true,
1707 scrub_defaults: true,
1708 ..Default::default()
1709 };
1710 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1711 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1712
1713 process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1714 assert_annotated_snapshot!(span);
1715 }
1716
1717 #[test]
1718 fn test_span_data_pii() {
1719 let mut span = Span::from_value(
1720 json!({
1721 "data": {
1722 "code.filepath": "src/sentry/api/authentication.py",
1723 }
1724 })
1725 .into(),
1726 );
1727
1728 let ds_config = DataScrubbingConfig {
1729 scrub_data: true,
1730 scrub_defaults: true,
1731 ..Default::default()
1732 };
1733 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1734
1735 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1736 processor::process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1737 assert_eq!(
1738 get_value!(span.data.code_filepath!).as_str(),
1739 Some("src/sentry/api/authentication.py")
1740 );
1741 }
1742
1743 #[test]
1744 fn test_csp_source_file_pii() {
1745 let mut event = Event::from_value(
1746 json!({
1747 "csp": {
1748 "source_file": "authentication.js",
1749 }
1750 })
1751 .into(),
1752 );
1753
1754 let config = serde_json::from_str::<PiiConfig>(
1755 r#"
1756 {
1757 "applications": {
1758 "csp.source_file": ["@anything:filter"]
1759 }
1760 }
1761 "#,
1762 )
1763 .unwrap();
1764
1765 let mut pii_processor = PiiProcessor::new(config.compiled());
1766 processor::process_value(&mut event, &mut pii_processor, ProcessingState::root()).unwrap();
1767 assert_eq!(get_value!(event.csp.source_file!).as_str(), "[Filtered]");
1768 }
1769
1770 #[test]
1771 fn test_scrub_breadcrumb_data_http_not_scrubbed() {
1772 let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1773 r#"{
1774 "data": {
1775 "http": {
1776 "query": "dance=true"
1777 }
1778 }
1779 }"#,
1780 )
1781 .unwrap();
1782
1783 let ds_config = DataScrubbingConfig {
1784 scrub_data: true,
1785 scrub_defaults: true,
1786 ..Default::default()
1787 };
1788 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1789 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1790 process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1791 assert_annotated_snapshot!(breadcrumb);
1792 }
1793
1794 #[test]
1795 fn test_scrub_breadcrumb_data_http_strings_are_scrubbed() {
1796 let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1797 r#"{
1798 "data": {
1799 "http": {
1800 "query": "ccnumber=5105105105105100&process_id=123",
1801 "fragment": "ccnumber=5105105105105100,process_id=123"
1802 }
1803 }
1804 }"#,
1805 )
1806 .unwrap();
1807
1808 let ds_config = DataScrubbingConfig {
1809 scrub_data: true,
1810 scrub_defaults: true,
1811 ..Default::default()
1812 };
1813 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1814 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1815 process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1816 assert_annotated_snapshot!(breadcrumb);
1817 }
1818
1819 #[test]
1820 fn test_scrub_breadcrumb_data_http_objects_are_scrubbed() {
1821 let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1822 r#"{
1823 "data": {
1824 "http": {
1825 "query": {
1826 "ccnumber": "5105105105105100",
1827 "process_id": "123"
1828 },
1829 "fragment": {
1830 "ccnumber": "5105105105105100",
1831 "process_id": "123"
1832 }
1833 }
1834 }
1835 }"#,
1836 )
1837 .unwrap();
1838
1839 let ds_config = DataScrubbingConfig {
1840 scrub_data: true,
1841 scrub_defaults: true,
1842 ..Default::default()
1843 };
1844 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1845 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1846
1847 process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1848 assert_annotated_snapshot!(breadcrumb);
1849 }
1850
1851 #[test]
1852 fn test_scrub_breadcrumb_data_untyped_props_are_scrubbed() {
1853 let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1854 r#"{
1855 "data": {
1856 "untyped": "ccnumber=5105105105105100",
1857 "more_untyped": {
1858 "typed": "no",
1859 "scrubbed": "yes",
1860 "ccnumber": "5105105105105100"
1861 }
1862 }
1863 }"#,
1864 )
1865 .unwrap();
1866
1867 let ds_config = DataScrubbingConfig {
1868 scrub_data: true,
1869 scrub_defaults: true,
1870 ..Default::default()
1871 };
1872 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1873 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1874 process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1875 assert_annotated_snapshot!(breadcrumb);
1876 }
1877
1878 #[test]
1879 fn test_scrub_graphql_response_data_with_variables() {
1880 let mut data = Event::from_value(
1881 json!({
1882 "request": {
1883 "data": {
1884 "query": "{\n viewer {\n login\n }\n}",
1885 "variables": {
1886 "login": "foo"
1887 }
1888 },
1889 "api_target": "graphql"
1890 },
1891 "contexts": {
1892 "response": {
1893 "type": "response",
1894 "data": {
1895 "data": {
1896 "viewer": {
1897 "login": "foo"
1898 }
1899 }
1900 }
1901 }
1902 }
1903 })
1904 .into(),
1905 );
1906
1907 scrub_graphql(data.value_mut().as_mut().unwrap());
1908
1909 assert_debug_snapshot!(&data);
1910 }
1911
1912 #[test]
1913 fn test_scrub_graphql_response_data_without_variables() {
1914 let mut data = Event::from_value(
1915 json!({
1916 "request": {
1917 "data": {
1918 "query": "{\n viewer {\n login\n }\n}"
1919 },
1920 "api_target": "graphql"
1921 },
1922 "contexts": {
1923 "response": {
1924 "type": "response",
1925 "data": {
1926 "data": {
1927 "viewer": {
1928 "login": "foo"
1929 }
1930 }
1931 }
1932 }
1933 }
1934 })
1935 .into(),
1936 );
1937
1938 scrub_graphql(data.value_mut().as_mut().unwrap());
1939 assert_debug_snapshot!(&data);
1940 }
1941
1942 #[test]
1943 fn test_does_not_scrub_if_no_graphql() {
1944 let mut data = Event::from_value(
1945 json!({
1946 "request": {
1947 "data": {
1948 "query": "{\n viewer {\n login\n }\n}",
1949 "variables": {
1950 "login": "foo"
1951 }
1952 },
1953 },
1954 "contexts": {
1955 "response": {
1956 "type": "response",
1957 "data": {
1958 "data": {
1959 "viewer": {
1960 "login": "foo"
1961 }
1962 }
1963 }
1964 }
1965 }
1966 })
1967 .into(),
1968 );
1969
1970 let scrubbing_config = DataScrubbingConfig {
1971 scrub_data: true,
1972 scrub_ip_addresses: true,
1973 scrub_defaults: true,
1974 ..Default::default()
1975 };
1976
1977 let pii_config = to_pii_config(&scrubbing_config).unwrap();
1978 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1979
1980 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1981
1982 assert_debug_snapshot!(&data);
1983 }
1984
1985 #[test]
1986 fn test_logentry_params_scrubbed() {
1987 let config = serde_json::from_str::<PiiConfig>(
1988 r##"
1989 {
1990 "applications": {
1991 "$string": ["@anything:remove"]
1992 }
1993 }
1994 "##,
1995 )
1996 .unwrap();
1997
1998 let mut event = Annotated::new(Event {
1999 logentry: Annotated::new(LogEntry {
2000 message: Annotated::new(Message::from("failed to parse report id=%s".to_owned())),
2001 formatted: Annotated::new("failed to parse report id=1".to_owned().into()),
2002 params: Annotated::new(Value::Array(vec![Annotated::new(Value::String(
2003 "12345".to_owned(),
2004 ))])),
2005 ..Default::default()
2006 }),
2007 ..Default::default()
2008 });
2009
2010 let mut processor = PiiProcessor::new(config.compiled());
2011 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2012
2013 let params = get_value!(event.logentry.params!);
2014 assert_debug_snapshot!(params, @r###"
2015 Array(
2016 [
2017 Meta {
2018 remarks: [
2019 Remark {
2020 ty: Removed,
2021 rule_id: "@anything:remove",
2022 range: None,
2023 },
2024 ],
2025 errors: [],
2026 original_length: None,
2027 original_value: None,
2028 },
2029 ],
2030 )
2031 "###);
2032 }
2033
2034 #[test]
2035 fn test_is_pairlist() {
2036 for (case, expected) in [
2037 (r#"[]"#, false),
2038 (r#"["foo"]"#, false),
2039 (r#"["foo", 123]"#, false),
2040 (r#"[[1, "foo"]]"#, false),
2041 (r#"[[["too_nested", 123]]]"#, false),
2042 (r#"[["foo", "bar"], [1, "foo"]]"#, false),
2043 (r#"[["foo", "bar"], ["foo", "bar", "baz"]]"#, false),
2044 (r#"[["foo", "bar", "baz"], ["foo", "bar"]]"#, false),
2045 (r#"["foo", ["bar", "baz"], ["foo", "bar"]]"#, false),
2046 (r#"[["foo", "bar"], [["too_nested", 123]]]"#, false),
2047 (r#"[["foo", 123]]"#, true),
2048 (r#"[["foo", "bar"]]"#, true),
2049 (
2050 r#"[["foo", "bar"], ["foo", {"nested": {"something": 1}}]]"#,
2051 true,
2052 ),
2053 ] {
2054 let v = Annotated::<Value>::from_json(case).unwrap();
2055 let Annotated(Some(Value::Array(mut a)), _) = v else {
2056 panic!()
2057 };
2058 assert_eq!(is_pairlist(&mut a), expected, "{case}");
2059 }
2060 }
2061
2062 #[test]
2063 fn test_tuple_array_scrubbed_with_path_selector() {
2064 let configs = vec![
2066 r##"
2069 {
2070 "applications": {
2071 "exception.values.0.stacktrace.frames.0.vars.headers.authorization": ["@anything:replace"]
2072 }
2073 }
2074 "##,
2075 r##"
2077 {
2078 "applications": {
2079 "exception.values.0.stacktrace.frames.0.vars.headers.0.1": ["@anything:replace"]
2080 }
2081 }
2082 "##,
2083 ];
2084
2085 let mut event = Event::from_value(
2086 serde_json::json!(
2087 {
2088 "message": "hi",
2089 "exception": {
2090 "values": [
2091 {
2092 "type": "BrokenException",
2093 "value": "Something failed",
2094 "stacktrace": {
2095 "frames": [
2096 {
2097 "vars": {
2098 "headers": [
2099 ["authorization", "Bearer abc123"]
2100 ]
2101 }
2102 }
2103 ]
2104 }
2105 }
2106 ]
2107 }
2108 })
2109 .into(),
2110 );
2111
2112 for config in configs {
2113 let config = serde_json::from_str::<PiiConfig>(config).unwrap();
2114 let mut processor = PiiProcessor::new(config.compiled());
2115 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2116
2117 let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
2118
2119 allow_duplicates!(assert_debug_snapshot!(vars, @r###"
2120 FrameVars(
2121 {
2122 "headers": Array(
2123 [
2124 Array(
2125 [
2126 String(
2127 "authorization",
2128 ),
2129 Annotated(
2130 String(
2131 "[Filtered]",
2132 ),
2133 Meta {
2134 remarks: [
2135 Remark {
2136 ty: Substituted,
2137 rule_id: "@anything:replace",
2138 range: Some(
2139 (
2140 0,
2141 10,
2142 ),
2143 ),
2144 },
2145 ],
2146 errors: [],
2147 original_length: Some(
2148 13,
2149 ),
2150 original_value: None,
2151 },
2152 ),
2153 ],
2154 ),
2155 ],
2156 ),
2157 },
2158 )
2159 "###));
2160 }
2161 }
2162
2163 #[test]
2164 fn test_tuple_array_scrubbed_with_string_selector_and_password_matcher() {
2165 let config = serde_json::from_str::<PiiConfig>(
2166 r##"
2167 {
2168 "applications": {
2169 "$string": ["@password:remove"]
2170 }
2171 }
2172 "##,
2173 )
2174 .unwrap();
2175
2176 let mut event = Event::from_value(
2177 serde_json::json!(
2178 {
2179 "message": "hi",
2180 "exception": {
2181 "values": [
2182 {
2183 "type": "BrokenException",
2184 "value": "Something failed",
2185 "stacktrace": {
2186 "frames": [
2187 {
2188 "vars": {
2189 "headers": [
2190 ["authorization", "abc123"]
2191 ]
2192 }
2193 }
2194 ]
2195 }
2196 }
2197 ]
2198 }
2199 })
2200 .into(),
2201 );
2202
2203 let mut processor = PiiProcessor::new(config.compiled());
2204 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2205
2206 let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
2207
2208 assert_debug_snapshot!(vars, @r###"
2209 FrameVars(
2210 {
2211 "headers": Array(
2212 [
2213 Array(
2214 [
2215 String(
2216 "authorization",
2217 ),
2218 Meta {
2219 remarks: [
2220 Remark {
2221 ty: Removed,
2222 rule_id: "@password:remove",
2223 range: None,
2224 },
2225 ],
2226 errors: [],
2227 original_length: None,
2228 original_value: None,
2229 },
2230 ],
2231 ),
2232 ],
2233 ),
2234 },
2235 )
2236 "###);
2237 }
2238}