1use std::borrow::Cow;
2use std::collections::{BTreeMap, BTreeSet};
3use std::mem;
4use std::sync::OnceLock;
5
6use regex::Regex;
7use relay_event_schema::processor::{
8 self, Chunk, FieldAttrs, Pii, ProcessValue, ProcessingAction, ProcessingResult,
9 ProcessingState, Processor, ValueType, enum_set, process_value,
10};
11use relay_event_schema::protocol::{
12 AsPair, Event, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User,
13};
14use relay_protocol::{Annotated, Array, Meta, Remark, RemarkType, Value};
15
16use crate::compiledconfig::{CompiledPiiConfig, RuleRef};
17use crate::config::RuleType;
18use crate::redactions::Redaction;
19use crate::regexes::{self, ANYTHING_REGEX, PatternType, ReplaceBehavior};
20use crate::utils;
21
22#[derive(Debug, Clone, Copy)]
24pub enum AttributeMode {
25 Object,
28 ValueOnly,
31}
32
33pub struct PiiProcessor<'a> {
35 attribute_mode: AttributeMode,
37 compiled_config: &'a CompiledPiiConfig,
38}
39
40impl<'a> PiiProcessor<'a> {
41 pub fn new(compiled_config: &'a CompiledPiiConfig) -> PiiProcessor<'a> {
43 PiiProcessor {
46 compiled_config,
47 attribute_mode: AttributeMode::Object,
48 }
49 }
50
51 pub fn attribute_mode(mut self, attribute_mode: AttributeMode) -> Self {
53 self.attribute_mode = attribute_mode;
54 self
55 }
56
57 fn apply_all_rules(
58 &self,
59 meta: &mut Meta,
60 state: &ProcessingState<'_>,
61 mut value: Option<&mut String>,
62 ) -> ProcessingResult {
63 let pii = state.pii();
64 if pii == Pii::False {
65 return Ok(());
66 }
67
68 for (selector, rules) in self.compiled_config.applications.iter() {
69 if selector.matches_path(&state.path()) {
70 #[allow(clippy::needless_option_as_deref)]
71 for rule in rules {
72 let reborrowed_value = value.as_deref_mut();
73 apply_rule_to_value(meta, rule, state.path().key(), reborrowed_value)?;
74 }
75 }
76 }
77
78 Ok(())
79 }
80}
81
82impl Processor for PiiProcessor<'_> {
83 fn before_process<T: ProcessValue>(
84 &mut self,
85 value: Option<&T>,
86 meta: &mut Meta,
87 state: &ProcessingState<'_>,
88 ) -> ProcessingResult {
89 if let Some(Value::String(original_value)) = meta.original_value_as_mut() {
90 if let Some(parent) = state.iter().next() {
94 let path = state.path();
95 let new_state = parent.enter_borrowed(
96 path.key().unwrap_or(""),
97 Some(Cow::Borrowed(state.attrs())),
98 enum_set!(ValueType::String),
99 );
100
101 if self
102 .apply_all_rules(&mut Meta::default(), &new_state, Some(original_value))
103 .is_err()
104 {
105 meta.set_original_value(Option::<String>::None);
107 }
108 }
109 }
110
111 if state.value_type().contains(ValueType::Boolean)
113 || state.value_type().contains(ValueType::String)
114 {
115 return Ok(());
116 }
117
118 if value.is_none() {
119 return Ok(());
120 }
121
122 self.apply_all_rules(meta, state, None)
124 }
125
126 fn process_array<T>(
127 &mut self,
128 array: &mut Array<T>,
129 _meta: &mut Meta,
130 state: &ProcessingState<'_>,
131 ) -> ProcessingResult
132 where
133 T: ProcessValue,
134 {
135 if is_pairlist(array) {
136 for annotated in array {
137 let mut mapped = mem::take(annotated).map_value(T::into_value);
138
139 if let Some(Value::Array(pair)) = mapped.value_mut() {
140 let mut value = mem::take(&mut pair[1]);
141 let value_type = ValueType::for_field(&value);
142
143 if let Some(key_name) = &pair[0].as_str() {
144 let key_state =
147 state.enter_borrowed(key_name, state.inner_attrs(), value_type);
148 process_value(&mut value, self, &key_state)?;
151 }
152
153 pair[1] = value;
155 }
156
157 *annotated = T::from_value(mapped);
159 }
160
161 Ok(())
162 } else {
163 array.process_child_values(self, state)
165 }
166 }
167
168 fn process_string(
169 &mut self,
170 value: &mut String,
171 meta: &mut Meta,
172 state: &ProcessingState<'_>,
173 ) -> ProcessingResult {
174 if let "" | "true" | "false" | "null" | "undefined" = value.as_str() {
175 return Ok(());
176 }
177
178 self.apply_all_rules(meta, state, Some(value))
181 }
182
183 fn process_native_image_path(
184 &mut self,
185 NativeImagePath(value): &mut NativeImagePath,
186 meta: &mut Meta,
187 state: &ProcessingState<'_>,
188 ) -> ProcessingResult {
189 if let Some(index) = value.rfind(['/', '\\']) {
200 let basename = value.split_off(index);
201 match self.process_string(value, meta, state) {
202 Ok(()) => value.push_str(&basename),
203 Err(
204 ProcessingAction::DeleteValueHard
205 | ProcessingAction::DeleteValueWithRemark(_)
206 | ProcessingAction::DeleteValueSoft,
207 ) => {
208 basename[1..].clone_into(value);
209 }
210 Err(ProcessingAction::InvalidTransaction(x)) => {
211 return Err(ProcessingAction::InvalidTransaction(x));
212 }
213 }
214 }
215
216 Ok(())
217 }
218
219 fn process_pairlist<T: ProcessValue + AsPair>(
220 &mut self,
221 value: &mut PairList<T>,
222 _meta: &mut Meta,
223 state: &ProcessingState,
224 ) -> ProcessingResult {
225 utils::process_pairlist(self, value, state)
226 }
227
228 fn process_attributes(
229 &mut self,
230 value: &mut relay_event_schema::protocol::Attributes,
231 _meta: &mut Meta,
232 state: &ProcessingState,
233 ) -> ProcessingResult {
234 match self.attribute_mode {
235 AttributeMode::Object => value.process_child_values(self, state),
237 AttributeMode::ValueOnly => {
239 for (key, attribute) in value.0.iter_mut() {
240 let Some(attribute) = attribute.value_mut() else {
241 continue;
242 };
243
244 let attrs = FieldAttrs::new()
249 .pii_dynamic(relay_event_schema::protocol::attribute_pii_from_conventions);
250 let inner_value = &mut attribute.value.value;
251 let inner_value_type = ValueType::for_field(inner_value);
252 let entered =
253 state.enter_borrowed(key, Some(Cow::Borrowed(&attrs)), inner_value_type);
254
255 processor::process_value(inner_value, self, &entered)?;
256 self.process_other(&mut attribute.other, state)?;
257 }
258 Ok(())
259 }
260 }
261 }
262
263 fn process_user(
264 &mut self,
265 user: &mut User,
266 _meta: &mut Meta,
267 state: &ProcessingState<'_>,
268 ) -> ProcessingResult {
269 let ip_was_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
270
271 user.process_child_values(self, state)?;
273
274 let has_other_fields = user.id.value().is_some()
275 || user.username.value().is_some()
276 || user.email.value().is_some();
277
278 let ip_is_still_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
279
280 if ip_was_valid && !has_other_fields && !ip_is_still_valid {
290 user.id = mem::take(&mut user.ip_address).map_value(|ip| ip.into_inner().into());
291 user.ip_address.meta_mut().add_remark(Remark::new(
292 RemarkType::Removed,
293 "pii:ip_address".to_owned(),
294 ));
295 }
296
297 Ok(())
298 }
299
300 fn process_replay(
302 &mut self,
303 replay: &mut Replay,
304 _meta: &mut Meta,
305 state: &ProcessingState<'_>,
306 ) -> ProcessingResult {
307 replay.process_child_values(self, state)?;
308 Ok(())
309 }
310}
311
312#[derive(Default)]
313struct PairListProcessor {
314 is_pair: bool,
315 has_string_key: bool,
316}
317
318impl PairListProcessor {
319 fn is_pair_array(&self) -> bool {
322 self.is_pair && self.has_string_key
323 }
324}
325
326impl Processor for PairListProcessor {
327 fn process_array<T>(
328 &mut self,
329 value: &mut Array<T>,
330 _meta: &mut Meta,
331 state: &ProcessingState<'_>,
332 ) -> ProcessingResult
333 where
334 T: ProcessValue,
335 {
336 self.is_pair = state.depth() == 0 && value.len() == 2;
337 if self.is_pair {
338 let key_type = ValueType::for_field(&value[0]);
339 process_value(
340 &mut value[0],
341 self,
342 &state.enter_index(0, state.inner_attrs(), key_type),
343 )?;
344 }
345
346 Ok(())
347 }
348
349 fn process_string(
350 &mut self,
351 _value: &mut String,
352 _meta: &mut Meta,
353 state: &ProcessingState<'_>,
354 ) -> ProcessingResult where {
355 if state.depth() == 1 && state.path().index() == Some(0) {
356 self.has_string_key = true;
357 }
358
359 Ok(())
360 }
361}
362
363fn is_pairlist<T: ProcessValue>(array: &mut Array<T>) -> bool {
364 for element in array.iter_mut() {
365 let mut visitor = PairListProcessor::default();
366 process_value(element, &mut visitor, ProcessingState::root()).ok();
367 if !visitor.is_pair_array() {
368 return false;
369 }
370 }
371
372 !array.is_empty()
373}
374
375pub fn scrub_graphql(event: &mut Event) {
377 let mut keys: BTreeSet<&str> = BTreeSet::new();
378
379 let mut is_graphql = false;
380
381 if let Some(request) = event.request.value_mut()
383 && let Some(Value::Object(data)) = request.data.value_mut()
384 {
385 if let Some(api_target) = request.api_target.value()
386 && api_target.eq_ignore_ascii_case("graphql")
387 {
388 is_graphql = true;
389 }
390
391 if is_graphql
392 && let Some(Annotated(Some(Value::Object(variables)), _)) = data.get_mut("variables")
393 {
394 for (key, value) in variables.iter_mut() {
395 keys.insert(key);
396 value.set_value(Some(Value::String("[Filtered]".to_owned())));
397 }
398 }
399 }
400
401 if !is_graphql {
402 return;
403 }
404
405 if let Some(contexts) = event.contexts.value_mut()
407 && let Some(response) = contexts.get_mut::<ResponseContext>()
408 && let Some(Value::Object(data)) = response.data.value_mut()
409 && let Some(Annotated(Some(Value::Object(graphql_data)), _)) = data.get_mut("data")
410 {
411 if !keys.is_empty() {
412 scrub_graphql_data(&keys, graphql_data);
413 } else {
414 data.remove("data");
417 }
418 }
419}
420
421fn scrub_graphql_data(keys: &BTreeSet<&str>, data: &mut BTreeMap<String, Annotated<Value>>) {
423 for (key, value) in data.iter_mut() {
424 match value.value_mut() {
425 Some(Value::Object(item_data)) => {
426 scrub_graphql_data(keys, item_data);
427 }
428 _ => {
429 if keys.contains(key.as_str()) {
430 value.set_value(Some(Value::String("[Filtered]".to_owned())));
431 }
432 }
433 }
434 }
435}
436
437fn apply_rule_to_value(
438 meta: &mut Meta,
439 rule: &RuleRef,
440 key: Option<&str>,
441 mut value: Option<&mut String>,
442) -> ProcessingResult {
443 let should_redact_chunks = !matches!(rule.redaction, Redaction::Default | Redaction::Remove);
446
447 if rule.ty == RuleType::Anything && (value.is_none() || !should_redact_chunks) {
450 meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
452 return Err(ProcessingAction::DeleteValueHard);
453 }
454
455 macro_rules! apply_regex {
456 ($regex:expr, $replace_behavior:expr) => {
457 if let Some(ref mut value) = value {
458 processor::process_chunked_value(value, meta, |chunks| {
459 apply_regex_to_chunks(chunks, rule, $regex, $replace_behavior)
460 });
461 }
462 };
463 }
464
465 for (pattern_type, regex, replace_behavior) in regexes::get_regex_for_rule_type(&rule.ty) {
466 match pattern_type {
467 PatternType::KeyValue => {
468 if regex.is_match(key.unwrap_or("")) {
469 if value.is_some() && should_redact_chunks {
470 apply_regex!(&ANYTHING_REGEX, replace_behavior);
473 } else {
474 meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
475 return Err(ProcessingAction::DeleteValueHard);
476 }
477 } else {
478 apply_regex!(regex, replace_behavior);
481 }
482 }
483 PatternType::Value => {
484 apply_regex!(regex, replace_behavior);
485 }
486 }
487 }
488
489 Ok(())
490}
491
492fn apply_regex_to_chunks<'a>(
493 chunks: Vec<Chunk<'a>>,
494 rule: &RuleRef,
495 regex: &Regex,
496 replace_behavior: ReplaceBehavior,
497) -> Vec<Chunk<'a>> {
498 let mut search_string = String::new();
503 let mut has_text = false;
504 for chunk in &chunks {
505 match chunk {
506 Chunk::Text { text } => {
507 has_text = true;
508 search_string.push_str(&text.replace('\x00', ""));
509 }
510 Chunk::Redaction { .. } => search_string.push('\x00'),
511 }
512 }
513
514 if !has_text {
515 return chunks;
517 }
518
519 let mut captures_iter = regex.captures_iter(&search_string).peekable();
521 if captures_iter.peek().is_none() {
522 return chunks;
523 }
524
525 let mut replacement_chunks = vec![];
526 for chunk in chunks {
527 if let Chunk::Redaction { .. } = chunk {
528 replacement_chunks.push(chunk);
529 }
530 }
531 replacement_chunks.reverse();
532
533 fn process_text<'a>(
534 text: &str,
535 rv: &mut Vec<Chunk<'a>>,
536 replacement_chunks: &mut Vec<Chunk<'a>>,
537 ) {
538 if text.is_empty() {
539 return;
540 }
541
542 static NULL_SPLIT_RE: OnceLock<Regex> = OnceLock::new();
543 let regex = NULL_SPLIT_RE.get_or_init(|| {
544 #[allow(clippy::trivial_regex)]
545 Regex::new("\x00").unwrap()
546 });
547
548 let mut pos = 0;
549 for piece in regex.find_iter(text) {
550 rv.push(Chunk::Text {
551 text: Cow::Owned(text[pos..piece.start()].to_string()),
552 });
553 rv.push(replacement_chunks.pop().unwrap());
554 pos = piece.end();
555 }
556
557 rv.push(Chunk::Text {
558 text: Cow::Owned(text[pos..].to_string()),
559 });
560 }
561
562 let mut pos = 0;
563 let mut rv = Vec::with_capacity(replacement_chunks.len());
564
565 match replace_behavior {
566 ReplaceBehavior::Groups(ref groups) => {
567 for m in captures_iter {
568 for (idx, g) in m.iter().enumerate() {
569 if let Some(g) = g
570 && groups.contains(&(idx as u8))
571 {
572 process_text(
573 &search_string[pos..g.start()],
574 &mut rv,
575 &mut replacement_chunks,
576 );
577 insert_replacement_chunks(rule, g.as_str(), &mut rv);
578 pos = g.end();
579 }
580 }
581 }
582 process_text(&search_string[pos..], &mut rv, &mut replacement_chunks);
583 debug_assert!(replacement_chunks.is_empty());
584 }
585 ReplaceBehavior::Value => {
586 insert_replacement_chunks(rule, &search_string, &mut rv);
590 }
591 }
592 rv
593}
594
595fn insert_replacement_chunks(rule: &RuleRef, text: &str, output: &mut Vec<Chunk<'_>>) {
596 match &rule.redaction {
597 Redaction::Default | Redaction::Remove => {
598 output.push(Chunk::Redaction {
599 text: Cow::Borrowed(""),
600 rule_id: Cow::Owned(rule.origin.to_string()),
601 ty: RemarkType::Removed,
602 });
603 }
604 Redaction::Mask => {
605 let buf = vec!['*'; text.chars().count()];
606
607 output.push(Chunk::Redaction {
608 ty: RemarkType::Masked,
609 rule_id: Cow::Owned(rule.origin.to_string()),
610 text: buf.into_iter().collect(),
611 })
612 }
613 Redaction::Hash => {
614 output.push(Chunk::Redaction {
615 ty: RemarkType::Pseudonymized,
616 rule_id: Cow::Owned(rule.origin.to_string()),
617 text: Cow::Owned(utils::hash_value(text.as_bytes())),
618 });
619 }
620 Redaction::Replace(replace) => {
621 output.push(Chunk::Redaction {
622 ty: RemarkType::Substituted,
623 rule_id: Cow::Owned(rule.origin.to_string()),
624 text: Cow::Owned(replace.text.clone()),
625 });
626 }
627 Redaction::Other => relay_log::debug!("Incoming redaction is not supported"),
628 }
629}
630
631#[cfg(test)]
632mod tests {
633 use insta::{allow_duplicates, assert_debug_snapshot};
634 use relay_event_schema::processor::process_value;
635 use relay_event_schema::protocol::{
636 Addr, Breadcrumb, DebugImage, DebugMeta, ExtraValue, Headers, LogEntry, Message,
637 NativeDebugImage, Request, Span, TagEntry, Tags, TraceContext,
638 };
639 use relay_protocol::{FromValue, Object, assert_annotated_snapshot, get_value};
640 use serde_json::json;
641
642 use super::*;
643 use crate::{DataScrubbingConfig, PiiConfig, ReplaceRedaction};
644
645 fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Option<PiiConfig> {
646 use crate::convert::to_pii_config as to_pii_config_impl;
647 let rv = to_pii_config_impl(datascrubbing_config);
648 if let Some(ref config) = rv {
649 let roundtrip: PiiConfig =
650 serde_json::from_value(serde_json::to_value(config).unwrap()).unwrap();
651 assert_eq!(&roundtrip, config);
652 }
653 rv
654 }
655
656 #[test]
657 fn test_scrub_original_value() {
658 let mut data = Event::from_value(
659 json!({
660 "user": {
661 "username": "hey man 73.133.27.120", "ip_address": "is this an ip address? 73.133.27.120", },
664 "hpkp":"invalid data my ip address is 74.133.27.120 and my credit card number is 4571234567890111 ",
665 })
666 .into(),
667 );
668
669 let scrubbing_config = DataScrubbingConfig {
670 scrub_data: true,
671 scrub_ip_addresses: true,
672 scrub_defaults: true,
673 ..Default::default()
674 };
675
676 let pii_config = to_pii_config(&scrubbing_config).unwrap();
677 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
678
679 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
680
681 assert_debug_snapshot!(&data);
682 }
683
684 #[test]
685 fn test_sentry_user() {
686 let mut data = Event::from_value(
687 json!({
688 "user": {
689 "ip_address": "73.133.27.120",
690 "sentry_user": "ip:73.133.27.120",
691 },
692 })
693 .into(),
694 );
695
696 let scrubbing_config = DataScrubbingConfig {
697 scrub_data: true,
698 scrub_ip_addresses: true,
699 scrub_defaults: true,
700 ..Default::default()
701 };
702
703 let pii_config = to_pii_config(&scrubbing_config).unwrap();
704 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
705
706 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
707
708 assert_debug_snapshot!(&data);
709 }
710
711 #[test]
712 fn test_basic_stripping() {
713 let config = serde_json::from_str::<PiiConfig>(
714 r#"
715 {
716 "rules": {
717 "remove_bad_headers": {
718 "type": "redact_pair",
719 "keyPattern": "(?i)cookie|secret[-_]?key"
720 }
721 },
722 "applications": {
723 "$string": ["@ip"],
724 "$object.**": ["remove_bad_headers"]
725 }
726 }
727 "#,
728 )
729 .unwrap();
730
731 let mut event = Annotated::new(Event {
732 logentry: Annotated::new(LogEntry {
733 formatted: Annotated::new("Hello world!".to_owned().into()),
734 ..Default::default()
735 }),
736 request: Annotated::new(Request {
737 env: {
738 let mut rv = Object::new();
739 rv.insert(
740 "SECRET_KEY".to_owned(),
741 Annotated::new(Value::String("134141231231231231231312".into())),
742 );
743 Annotated::new(rv)
744 },
745 headers: {
746 let rv = vec![
747 Annotated::new((
748 Annotated::new("Cookie".to_owned().into()),
749 Annotated::new("super secret".to_owned().into()),
750 )),
751 Annotated::new((
752 Annotated::new("X-Forwarded-For".to_owned().into()),
753 Annotated::new("127.0.0.1".to_owned().into()),
754 )),
755 ];
756 Annotated::new(Headers(PairList(rv)))
757 },
758 ..Default::default()
759 }),
760 tags: Annotated::new(Tags(
761 vec![Annotated::new(TagEntry(
762 Annotated::new("forwarded_for".to_owned()),
763 Annotated::new("127.0.0.1".to_owned()),
764 ))]
765 .into(),
766 )),
767 ..Default::default()
768 });
769
770 let mut processor = PiiProcessor::new(config.compiled());
771 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
772 assert_annotated_snapshot!(event);
773 }
774
775 #[test]
776 fn test_redact_containers() {
777 let config = serde_json::from_str::<PiiConfig>(
778 r#"
779 {
780 "applications": {
781 "$object": ["@anything"]
782 }
783 }
784 "#,
785 )
786 .unwrap();
787
788 let mut event = Annotated::new(Event {
789 extra: {
790 let mut map = Object::new();
791 map.insert(
792 "foo".to_owned(),
793 Annotated::new(ExtraValue(Value::String("bar".to_owned()))),
794 );
795 Annotated::new(map)
796 },
797 ..Default::default()
798 });
799
800 let mut processor = PiiProcessor::new(config.compiled());
801 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
802 assert_annotated_snapshot!(event);
803 }
804
805 #[test]
806 fn test_redact_custom_pattern() {
807 let config = serde_json::from_str::<PiiConfig>(
808 r#"
809 {
810 "applications": {
811 "$string": ["myrule"]
812 },
813 "rules": {
814 "myrule": {
815 "type": "pattern",
816 "pattern": "foo",
817 "redaction": {
818 "method": "replace",
819 "text": "asd"
820 }
821 }
822 }
823 }
824 "#,
825 )
826 .unwrap();
827
828 let mut event = Annotated::new(Event {
829 extra: {
830 let mut map = Object::new();
831 map.insert(
832 "myvalue".to_owned(),
833 Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
834 );
835 Annotated::new(map)
836 },
837 ..Default::default()
838 });
839
840 let mut processor = PiiProcessor::new(config.compiled());
841 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
842 assert_annotated_snapshot!(event);
843 }
844
845 #[test]
846 fn test_redact_custom_negative_pattern() {
847 let config = serde_json::from_str::<PiiConfig>(
848 r#"
849 {
850 "applications": {
851 "$string": ["myrule"]
852 },
853 "rules": {
854 "myrule": {
855 "type": "pattern",
856 "pattern": "the good string|.*OK.*|(.*)",
857 "replaceGroups": [1],
858 "redaction": {
859 "method": "mask"
860 }
861 }
862 }
863 }
864 "#,
865 )
866 .unwrap();
867
868 let mut event = Annotated::<Event>::from_json(
869 r#"{
870 "extra": {
871 "1": "the good string",
872 "2": "a bad string",
873 "3": "another OK string",
874 "4": "another bad one"
875 }
876 }"#,
877 )
878 .unwrap();
879
880 let mut processor = PiiProcessor::new(config.compiled());
881 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
882 assert_annotated_snapshot!(event.value().unwrap().extra, @r#"
883 {
884 "1": "the good string",
885 "2": "************",
886 "3": "another OK string",
887 "4": "***************",
888 "_meta": {
889 "2": {
890 "": {
891 "rem": [
892 [
893 "myrule",
894 "m",
895 0,
896 12
897 ]
898 ],
899 "len": 12
900 }
901 },
902 "4": {
903 "": {
904 "rem": [
905 [
906 "myrule",
907 "m",
908 0,
909 15
910 ]
911 ],
912 "len": 15
913 }
914 }
915 }
916 }
917 "#);
918 }
919
920 #[test]
921 fn test_no_field_upsert() {
922 let config = serde_json::from_str::<PiiConfig>(
923 r#"
924 {
925 "applications": {
926 "**": ["@anything:remove"]
927 }
928 }
929 "#,
930 )
931 .unwrap();
932
933 let mut event = Annotated::new(Event {
934 extra: {
935 let mut map = Object::new();
936 map.insert(
937 "myvalue".to_owned(),
938 Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
939 );
940 Annotated::new(map)
941 },
942 ..Default::default()
943 });
944
945 let mut processor = PiiProcessor::new(config.compiled());
946 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
947 assert_annotated_snapshot!(event);
948 }
949
950 #[test]
951 fn test_anything_hash_on_string() {
952 let config = serde_json::from_str::<PiiConfig>(
953 r#"
954 {
955 "applications": {
956 "$string": ["@anything:hash"]
957 }
958 }
959 "#,
960 )
961 .unwrap();
962
963 let mut event = Annotated::new(Event {
964 extra: {
965 let mut map = Object::new();
966 map.insert(
967 "myvalue".to_owned(),
968 Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
969 );
970 Annotated::new(map)
971 },
972 ..Default::default()
973 });
974
975 let mut processor = PiiProcessor::new(config.compiled());
976 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
977 assert_annotated_snapshot!(event);
978 }
979
980 #[test]
981 fn test_anything_hash_on_container() {
982 let config = serde_json::from_str::<PiiConfig>(
983 r#"
984 {
985 "applications": {
986 "$object": ["@anything:hash"]
987 }
988 }
989 "#,
990 )
991 .unwrap();
992
993 let mut event = Annotated::new(Event {
994 extra: {
995 let mut map = Object::new();
996 map.insert(
997 "myvalue".to_owned(),
998 Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
999 );
1000 Annotated::new(map)
1001 },
1002 ..Default::default()
1003 });
1004
1005 let mut processor = PiiProcessor::new(config.compiled());
1006 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1007 assert_annotated_snapshot!(event);
1008 }
1009
1010 #[test]
1011 fn test_ignore_user_agent_ip_scrubbing() {
1012 let mut data = Event::from_value(
1013 json!({
1014 "request": {
1015 "headers": [
1016 ["User-Agent", "127.0.0.1"],
1017 ["X-Client-Ip", "10.0.0.1"]
1018 ]
1019 },
1020 })
1021 .into(),
1022 );
1023
1024 let scrubbing_config = DataScrubbingConfig {
1025 scrub_data: true,
1026 scrub_ip_addresses: true,
1027 scrub_defaults: true,
1028 ..Default::default()
1029 };
1030
1031 let pii_config = to_pii_config(&scrubbing_config).unwrap();
1032 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1033
1034 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1035
1036 assert_annotated_snapshot!(&data);
1037 }
1038
1039 #[test]
1040 fn test_remove_debugmeta_path() {
1041 let config = serde_json::from_str::<PiiConfig>(
1042 r#"
1043 {
1044 "applications": {
1045 "debug_meta.images.*.code_file": ["@anything:remove"],
1046 "debug_meta.images.*.debug_file": ["@anything:remove"]
1047 }
1048 }
1049 "#,
1050 )
1051 .unwrap();
1052
1053 let mut event = Annotated::new(Event {
1054 debug_meta: Annotated::new(DebugMeta {
1055 images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1056 NativeDebugImage {
1057 code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1058 code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1059 debug_id: Annotated::new(
1060 "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1061 ),
1062 debug_file: Annotated::new("wntdll.pdb".into()),
1063 debug_checksum: Annotated::empty(),
1064 arch: Annotated::new("arm64".to_owned()),
1065 image_addr: Annotated::new(Addr(0)),
1066 image_size: Annotated::new(4096),
1067 image_vmaddr: Annotated::new(Addr(32768)),
1068 other: {
1069 let mut map = Object::new();
1070 map.insert(
1071 "other".to_owned(),
1072 Annotated::new(Value::String("value".to_owned())),
1073 );
1074 map
1075 },
1076 },
1077 )))]),
1078 ..Default::default()
1079 }),
1080 ..Default::default()
1081 });
1082
1083 let mut processor = PiiProcessor::new(config.compiled());
1084 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1085 assert_annotated_snapshot!(event);
1086 }
1087
1088 #[test]
1089 fn test_replace_debugmeta_path() {
1090 let config = serde_json::from_str::<PiiConfig>(
1091 r#"
1092 {
1093 "applications": {
1094 "debug_meta.images.*.code_file": ["@anything:replace"],
1095 "debug_meta.images.*.debug_file": ["@anything:replace"]
1096 }
1097 }
1098 "#,
1099 )
1100 .unwrap();
1101
1102 let mut event = Annotated::new(Event {
1103 debug_meta: Annotated::new(DebugMeta {
1104 images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1105 NativeDebugImage {
1106 code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1107 code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1108 debug_id: Annotated::new(
1109 "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1110 ),
1111 debug_file: Annotated::new("wntdll.pdb".into()),
1112 debug_checksum: Annotated::empty(),
1113 arch: Annotated::new("arm64".to_owned()),
1114 image_addr: Annotated::new(Addr(0)),
1115 image_size: Annotated::new(4096),
1116 image_vmaddr: Annotated::new(Addr(32768)),
1117 other: {
1118 let mut map = Object::new();
1119 map.insert(
1120 "other".to_owned(),
1121 Annotated::new(Value::String("value".to_owned())),
1122 );
1123 map
1124 },
1125 },
1126 )))]),
1127 ..Default::default()
1128 }),
1129 ..Default::default()
1130 });
1131
1132 let mut processor = PiiProcessor::new(config.compiled());
1133 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1134 assert_annotated_snapshot!(event);
1135 }
1136
1137 #[test]
1138 fn test_hash_debugmeta_path() {
1139 let config = serde_json::from_str::<PiiConfig>(
1140 r#"
1141 {
1142 "applications": {
1143 "debug_meta.images.*.code_file": ["@anything:hash"],
1144 "debug_meta.images.*.debug_file": ["@anything:hash"]
1145 }
1146 }
1147 "#,
1148 )
1149 .unwrap();
1150
1151 let mut event = Annotated::new(Event {
1152 debug_meta: Annotated::new(DebugMeta {
1153 images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1154 NativeDebugImage {
1155 code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1156 code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1157 debug_id: Annotated::new(
1158 "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1159 ),
1160 debug_file: Annotated::new("wntdll.pdb".into()),
1161 debug_checksum: Annotated::empty(),
1162 arch: Annotated::new("arm64".to_owned()),
1163 image_addr: Annotated::new(Addr(0)),
1164 image_size: Annotated::new(4096),
1165 image_vmaddr: Annotated::new(Addr(32768)),
1166 other: {
1167 let mut map = Object::new();
1168 map.insert(
1169 "other".to_owned(),
1170 Annotated::new(Value::String("value".to_owned())),
1171 );
1172 map
1173 },
1174 },
1175 )))]),
1176 ..Default::default()
1177 }),
1178 ..Default::default()
1179 });
1180
1181 let mut processor = PiiProcessor::new(config.compiled());
1182 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1183 assert_annotated_snapshot!(event);
1184 }
1185
1186 #[test]
1187 fn test_debugmeta_path_not_addressible_with_wildcard_selector() {
1188 let config = serde_json::from_str::<PiiConfig>(
1189 r#"
1190 {
1191 "applications": {
1192 "$string": ["@anything:remove"],
1193 "**": ["@anything:remove"],
1194 "debug_meta.**": ["@anything:remove"],
1195 "(debug_meta.images.**.code_file & $string)": ["@anything:remove"]
1196 }
1197 }
1198 "#,
1199 )
1200 .unwrap();
1201
1202 let mut event = Annotated::new(Event {
1203 debug_meta: Annotated::new(DebugMeta {
1204 images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1205 NativeDebugImage {
1206 code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1207 code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1208 debug_id: Annotated::new(
1209 "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1210 ),
1211 debug_file: Annotated::new("wntdll.pdb".into()),
1212 debug_checksum: Annotated::empty(),
1213 arch: Annotated::new("arm64".to_owned()),
1214 image_addr: Annotated::new(Addr(0)),
1215 image_size: Annotated::new(4096),
1216 image_vmaddr: Annotated::new(Addr(32768)),
1217 other: {
1218 let mut map = Object::new();
1219 map.insert(
1220 "other".to_owned(),
1221 Annotated::new(Value::String("value".to_owned())),
1222 );
1223 map
1224 },
1225 },
1226 )))]),
1227 ..Default::default()
1228 }),
1229 ..Default::default()
1230 });
1231
1232 let mut processor = PiiProcessor::new(config.compiled());
1233 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1234 assert_annotated_snapshot!(event);
1235 }
1236
1237 #[test]
1238 fn test_quoted_keys() {
1239 let config = serde_json::from_str::<PiiConfig>(
1240 r#"
1241 {
1242 "applications": {
1243 "extra.'special ,./<>?!@#$%^&*())''gärbage'''": ["@anything:remove"]
1244 }
1245 }
1246 "#,
1247 )
1248 .unwrap();
1249
1250 let mut event = Annotated::new(Event {
1251 extra: {
1252 let mut map = Object::new();
1253 map.insert(
1254 "do not ,./<>?!@#$%^&*())'ßtrip'".to_owned(),
1255 Annotated::new(ExtraValue(Value::String("foo".to_owned()))),
1256 );
1257 map.insert(
1258 "special ,./<>?!@#$%^&*())'gärbage'".to_owned(),
1259 Annotated::new(ExtraValue(Value::String("bar".to_owned()))),
1260 );
1261 Annotated::new(map)
1262 },
1263 ..Default::default()
1264 });
1265
1266 let mut processor = PiiProcessor::new(config.compiled());
1267 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1268 assert_annotated_snapshot!(event);
1269 }
1270
1271 #[test]
1272 fn test_logentry_value_types() {
1273 for formatted_selector in &[
1275 "$logentry.formatted",
1276 "$message",
1277 "$logentry.formatted && $message",
1278 "$string",
1279 ] {
1280 let config = serde_json::from_str::<PiiConfig>(&format!(
1281 r##"
1282 {{
1283 "applications": {{
1284 "{formatted_selector}": ["@anything:remove"]
1285 }}
1286 }}
1287 "##
1288 ))
1289 .unwrap();
1290
1291 let mut event = Annotated::new(Event {
1292 logentry: Annotated::new(LogEntry {
1293 formatted: Annotated::new("Hello world!".to_owned().into()),
1294 ..Default::default()
1295 }),
1296 ..Default::default()
1297 });
1298
1299 let mut processor = PiiProcessor::new(config.compiled());
1300 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1301 assert!(
1302 event
1303 .value()
1304 .unwrap()
1305 .logentry
1306 .value()
1307 .unwrap()
1308 .formatted
1309 .value()
1310 .is_none()
1311 );
1312 }
1313 }
1314
1315 #[test]
1316 fn test_logentry_formatted_never_fully_filtered() {
1317 let config = crate::convert::to_pii_config(&crate::DataScrubbingConfig {
1320 scrub_data: true,
1321 scrub_defaults: true,
1322 scrub_ip_addresses: true,
1323 ..Default::default()
1324 })
1325 .unwrap();
1326
1327 let mut event = Annotated::new(Event {
1328 logentry: Annotated::new(LogEntry {
1329 formatted: Annotated::new(
1330 "User john.doe@company.com failed login with card 4111-1111-1111-1111"
1331 .to_owned()
1332 .into(),
1333 ),
1334 ..Default::default()
1335 }),
1336 ..Default::default()
1337 });
1338
1339 let mut processor = PiiProcessor::new(config.compiled());
1340 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1341 assert_annotated_snapshot!(event, @r#"
1342 {
1343 "logentry": {
1344 "formatted": "User [email] failed login with card [creditcard]"
1345 },
1346 "_meta": {
1347 "logentry": {
1348 "formatted": {
1349 "": {
1350 "rem": [
1351 [
1352 "@email:replace",
1353 "s",
1354 5,
1355 12
1356 ],
1357 [
1358 "@creditcard:replace",
1359 "s",
1360 36,
1361 48
1362 ]
1363 ],
1364 "len": 68
1365 }
1366 }
1367 }
1368 }
1369 }
1370 "#);
1371 }
1372
1373 #[test]
1374 fn test_logentry_formatted_bearer_token_scrubbing() {
1375 let config = crate::convert::to_pii_config(&crate::DataScrubbingConfig {
1377 scrub_data: true,
1378 scrub_defaults: true,
1379 ..Default::default()
1380 })
1381 .unwrap();
1382
1383 let mut event = Annotated::new(Event {
1384 logentry: Annotated::new(LogEntry {
1385 formatted: Annotated::new(
1386 "API request failed with Bearer ABC123XYZ789TOKEN and other data"
1387 .to_owned()
1388 .into(),
1389 ),
1390 ..Default::default()
1391 }),
1392 ..Default::default()
1393 });
1394
1395 let mut processor = PiiProcessor::new(config.compiled());
1396 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1397 assert_annotated_snapshot!(event, @r#"
1398 {
1399 "logentry": {
1400 "formatted": "API request failed with Bearer [token] and other data"
1401 },
1402 "_meta": {
1403 "logentry": {
1404 "formatted": {
1405 "": {
1406 "rem": [
1407 [
1408 "@bearer:replace",
1409 "s",
1410 24,
1411 38
1412 ]
1413 ],
1414 "len": 63
1415 }
1416 }
1417 }
1418 }
1419 }
1420 "#);
1421 }
1422
1423 #[test]
1424 fn test_logentry_formatted_password_word_not_scrubbed() {
1425 let config = PiiConfig::default();
1426 let mut event = Annotated::new(Event {
1427 logentry: Annotated::new(LogEntry {
1428 formatted: Annotated::new(
1429 "User password is secret123 for authentication"
1430 .to_owned()
1431 .into(),
1432 ),
1433 ..Default::default()
1434 }),
1435 ..Default::default()
1436 });
1437
1438 let mut processor = PiiProcessor::new(config.compiled());
1439 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1440 assert_annotated_snapshot!(event, @r#"
1441 {
1442 "logentry": {
1443 "formatted": "User password is secret123 for authentication"
1444 }
1445 }
1446 "#);
1447 }
1448
1449 #[test]
1450 fn test_ip_address_hashing() {
1451 let config = serde_json::from_str::<PiiConfig>(
1452 r#"
1453 {
1454 "applications": {
1455 "$user.ip_address": ["@ip:hash"]
1456 }
1457 }
1458 "#,
1459 )
1460 .unwrap();
1461
1462 let mut event = Annotated::new(Event {
1463 user: Annotated::new(User {
1464 ip_address: Annotated::new(IpAddr("127.0.0.1".to_owned())),
1465 ..Default::default()
1466 }),
1467 ..Default::default()
1468 });
1469
1470 let mut processor = PiiProcessor::new(config.compiled());
1471 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1472
1473 let user = event.value().unwrap().user.value().unwrap();
1474
1475 assert!(user.ip_address.value().is_none());
1476
1477 assert_eq!(
1478 user.id.value().unwrap().as_str(),
1479 "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1480 );
1481 }
1482
1483 #[test]
1484 fn test_ip_address_hashing_does_not_overwrite_id() {
1485 let config = serde_json::from_str::<PiiConfig>(
1486 r#"
1487 {
1488 "applications": {
1489 "$user.ip_address": ["@ip:hash"]
1490 }
1491 }
1492 "#,
1493 )
1494 .unwrap();
1495
1496 let mut event = Annotated::new(Event {
1497 user: Annotated::new(User {
1498 id: Annotated::new("123".to_owned().into()),
1499 ip_address: Annotated::new(IpAddr("127.0.0.1".to_owned())),
1500 ..Default::default()
1501 }),
1502 ..Default::default()
1503 });
1504
1505 let mut processor = PiiProcessor::new(config.compiled());
1506 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1507
1508 let user = event.value().unwrap().user.value().unwrap();
1509
1510 assert_eq!(
1512 user.ip_address.value().unwrap().as_str(),
1513 "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1514 );
1515
1516 assert_eq!(user.id.value().unwrap().as_str(), "123");
1517 }
1518
1519 #[test]
1520 fn test_replace_replaced_text() {
1521 let chunks = vec![Chunk::Redaction {
1522 text: "[ip]".into(),
1523 rule_id: "@ip".into(),
1524 ty: RemarkType::Substituted,
1525 }];
1526 let rule = RuleRef {
1527 id: "@ip:replace".into(),
1528 origin: "@ip".into(),
1529 ty: RuleType::Ip,
1530 redaction: Redaction::Replace(ReplaceRedaction {
1531 text: "[ip]".into(),
1532 }),
1533 };
1534 let res = apply_regex_to_chunks(
1535 chunks.clone(),
1536 &rule,
1537 &Regex::new(r#".*"#).unwrap(),
1538 ReplaceBehavior::Value,
1539 );
1540 assert_eq!(chunks, res);
1541 }
1542
1543 #[test]
1544 fn test_replace_replaced_text_anything() {
1545 let chunks = vec![Chunk::Redaction {
1546 text: "[Filtered]".into(),
1547 rule_id: "@password:filter".into(),
1548 ty: RemarkType::Substituted,
1549 }];
1550 let rule = RuleRef {
1551 id: "@anything:filter".into(),
1552 origin: "@anything:filter".into(),
1553 ty: RuleType::Anything,
1554 redaction: Redaction::Replace(ReplaceRedaction {
1555 text: "[Filtered]".into(),
1556 }),
1557 };
1558 let res = apply_regex_to_chunks(
1559 chunks.clone(),
1560 &rule,
1561 &Regex::new(r#".*"#).unwrap(),
1562 ReplaceBehavior::Groups(smallvec::smallvec![0]),
1563 );
1564 assert_eq!(chunks, res);
1565 }
1566
1567 #[test]
1568 fn test_trace_route_params_scrubbed() {
1569 let mut trace_context: Annotated<TraceContext> = Annotated::from_json(
1570 r#"
1571 {
1572 "type": "trace",
1573 "trace_id": "4c79f60c11214eb38604f4ae0781bfb2",
1574 "span_id": "fa90fdead5f74052",
1575 "data": {
1576 "previousRoute": {
1577 "params": {
1578 "password": "test"
1579 }
1580 }
1581 }
1582 }
1583 "#,
1584 )
1585 .unwrap();
1586
1587 let ds_config = DataScrubbingConfig {
1588 scrub_data: true,
1589 scrub_defaults: true,
1590 ..Default::default()
1591 };
1592 let pii_config = ds_config.pii_config().as_ref().unwrap();
1593 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1594
1595 process_value(
1596 &mut trace_context,
1597 &mut pii_processor,
1598 ProcessingState::root(),
1599 )
1600 .unwrap();
1601 assert_annotated_snapshot!(trace_context);
1602 }
1603
1604 #[test]
1605 fn test_scrub_span_data_http_not_scrubbed() {
1606 let mut span: Annotated<Span> = Annotated::from_json(
1607 r#"{
1608 "data": {
1609 "http": {
1610 "query": "dance=true"
1611 }
1612 }
1613 }"#,
1614 )
1615 .unwrap();
1616
1617 let ds_config = DataScrubbingConfig {
1618 scrub_data: true,
1619 scrub_defaults: true,
1620 ..Default::default()
1621 };
1622 let pii_config = ds_config.pii_config().as_ref().unwrap();
1623 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1624
1625 process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1626 assert_annotated_snapshot!(span);
1627 }
1628
1629 #[test]
1630 fn test_scrub_span_data_http_strings_are_scrubbed() {
1631 let mut span: Annotated<Span> = Annotated::from_json(
1632 r#"{
1633 "data": {
1634 "http": {
1635 "query": "ccnumber=5105105105105100&process_id=123",
1636 "fragment": "ccnumber=5105105105105100,process_id=123"
1637 }
1638 }
1639 }"#,
1640 )
1641 .unwrap();
1642
1643 let ds_config = DataScrubbingConfig {
1644 scrub_data: true,
1645 scrub_defaults: true,
1646 ..Default::default()
1647 };
1648 let pii_config = ds_config.pii_config().as_ref().unwrap();
1649 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1650
1651 process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1652 assert_annotated_snapshot!(span);
1653 }
1654
1655 #[test]
1656 fn test_scrub_span_data_http_objects_are_scrubbed() {
1657 let mut span: Annotated<Span> = Annotated::from_json(
1658 r#"{
1659 "data": {
1660 "http": {
1661 "query": {
1662 "ccnumber": "5105105105105100",
1663 "process_id": "123"
1664 },
1665 "fragment": {
1666 "ccnumber": "5105105105105100",
1667 "process_id": "123"
1668 }
1669 }
1670 }
1671 }"#,
1672 )
1673 .unwrap();
1674
1675 let ds_config = DataScrubbingConfig {
1676 scrub_data: true,
1677 scrub_defaults: true,
1678 ..Default::default()
1679 };
1680 let pii_config = ds_config.pii_config().as_ref().unwrap();
1681 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1682
1683 process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1684 assert_annotated_snapshot!(span);
1685 }
1686
1687 #[test]
1688 fn test_scrub_span_data_untyped_props_are_scrubbed() {
1689 let mut span: Annotated<Span> = Annotated::from_json(
1690 r#"{
1691 "data": {
1692 "untyped": "ccnumber=5105105105105100",
1693 "more_untyped": {
1694 "typed": "no",
1695 "scrubbed": "yes",
1696 "ccnumber": "5105105105105100"
1697 }
1698 }
1699 }"#,
1700 )
1701 .unwrap();
1702
1703 let ds_config = DataScrubbingConfig {
1704 scrub_data: true,
1705 scrub_defaults: true,
1706 ..Default::default()
1707 };
1708 let pii_config = ds_config.pii_config().as_ref().unwrap();
1709 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1710
1711 process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1712 assert_annotated_snapshot!(span);
1713 }
1714
1715 #[test]
1716 fn test_span_data_pii() {
1717 let mut span = Span::from_value(
1718 json!({
1719 "data": {
1720 "code.filepath": "src/sentry/api/authentication.py",
1721 }
1722 })
1723 .into(),
1724 );
1725
1726 let ds_config = DataScrubbingConfig {
1727 scrub_data: true,
1728 scrub_defaults: true,
1729 ..Default::default()
1730 };
1731 let pii_config = ds_config.pii_config().as_ref().unwrap();
1732
1733 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1734 processor::process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1735 assert_eq!(
1736 get_value!(span.data.code_filepath!).as_str(),
1737 Some("src/sentry/api/authentication.py")
1738 );
1739 }
1740
1741 #[test]
1742 fn test_csp_source_file_pii() {
1743 let mut event = Event::from_value(
1744 json!({
1745 "csp": {
1746 "source_file": "authentication.js",
1747 }
1748 })
1749 .into(),
1750 );
1751
1752 let config = serde_json::from_str::<PiiConfig>(
1753 r#"
1754 {
1755 "applications": {
1756 "csp.source_file": ["@anything:filter"]
1757 }
1758 }
1759 "#,
1760 )
1761 .unwrap();
1762
1763 let mut pii_processor = PiiProcessor::new(config.compiled());
1764 processor::process_value(&mut event, &mut pii_processor, ProcessingState::root()).unwrap();
1765 assert_eq!(get_value!(event.csp.source_file!).as_str(), "[Filtered]");
1766 }
1767
1768 #[test]
1769 fn test_scrub_breadcrumb_data_http_not_scrubbed() {
1770 let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1771 r#"{
1772 "data": {
1773 "http": {
1774 "query": "dance=true"
1775 }
1776 }
1777 }"#,
1778 )
1779 .unwrap();
1780
1781 let ds_config = DataScrubbingConfig {
1782 scrub_data: true,
1783 scrub_defaults: true,
1784 ..Default::default()
1785 };
1786 let pii_config = ds_config.pii_config().as_ref().unwrap();
1787 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1788 process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1789 assert_annotated_snapshot!(breadcrumb);
1790 }
1791
1792 #[test]
1793 fn test_scrub_breadcrumb_data_http_strings_are_scrubbed() {
1794 let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1795 r#"{
1796 "data": {
1797 "http": {
1798 "query": "ccnumber=5105105105105100&process_id=123",
1799 "fragment": "ccnumber=5105105105105100,process_id=123"
1800 }
1801 }
1802 }"#,
1803 )
1804 .unwrap();
1805
1806 let ds_config = DataScrubbingConfig {
1807 scrub_data: true,
1808 scrub_defaults: true,
1809 ..Default::default()
1810 };
1811 let pii_config = ds_config.pii_config().as_ref().unwrap();
1812 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1813 process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1814 assert_annotated_snapshot!(breadcrumb);
1815 }
1816
1817 #[test]
1818 fn test_scrub_breadcrumb_data_http_objects_are_scrubbed() {
1819 let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1820 r#"{
1821 "data": {
1822 "http": {
1823 "query": {
1824 "ccnumber": "5105105105105100",
1825 "process_id": "123"
1826 },
1827 "fragment": {
1828 "ccnumber": "5105105105105100",
1829 "process_id": "123"
1830 }
1831 }
1832 }
1833 }"#,
1834 )
1835 .unwrap();
1836
1837 let ds_config = DataScrubbingConfig {
1838 scrub_data: true,
1839 scrub_defaults: true,
1840 ..Default::default()
1841 };
1842 let pii_config = ds_config.pii_config().as_ref().unwrap();
1843 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1844
1845 process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1846 assert_annotated_snapshot!(breadcrumb);
1847 }
1848
1849 #[test]
1850 fn test_scrub_breadcrumb_data_untyped_props_are_scrubbed() {
1851 let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1852 r#"{
1853 "data": {
1854 "untyped": "ccnumber=5105105105105100",
1855 "more_untyped": {
1856 "typed": "no",
1857 "scrubbed": "yes",
1858 "ccnumber": "5105105105105100"
1859 }
1860 }
1861 }"#,
1862 )
1863 .unwrap();
1864
1865 let ds_config = DataScrubbingConfig {
1866 scrub_data: true,
1867 scrub_defaults: true,
1868 ..Default::default()
1869 };
1870 let pii_config = ds_config.pii_config().as_ref().unwrap();
1871 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1872 process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1873 assert_annotated_snapshot!(breadcrumb);
1874 }
1875
1876 #[test]
1877 fn test_scrub_graphql_response_data_with_variables() {
1878 let mut data = Event::from_value(
1879 json!({
1880 "request": {
1881 "data": {
1882 "query": "{\n viewer {\n login\n }\n}",
1883 "variables": {
1884 "login": "foo"
1885 }
1886 },
1887 "api_target": "graphql"
1888 },
1889 "contexts": {
1890 "response": {
1891 "type": "response",
1892 "data": {
1893 "data": {
1894 "viewer": {
1895 "login": "foo"
1896 }
1897 }
1898 }
1899 }
1900 }
1901 })
1902 .into(),
1903 );
1904
1905 scrub_graphql(data.value_mut().as_mut().unwrap());
1906
1907 assert_debug_snapshot!(&data);
1908 }
1909
1910 #[test]
1911 fn test_scrub_graphql_response_data_without_variables() {
1912 let mut data = Event::from_value(
1913 json!({
1914 "request": {
1915 "data": {
1916 "query": "{\n viewer {\n login\n }\n}"
1917 },
1918 "api_target": "graphql"
1919 },
1920 "contexts": {
1921 "response": {
1922 "type": "response",
1923 "data": {
1924 "data": {
1925 "viewer": {
1926 "login": "foo"
1927 }
1928 }
1929 }
1930 }
1931 }
1932 })
1933 .into(),
1934 );
1935
1936 scrub_graphql(data.value_mut().as_mut().unwrap());
1937 assert_debug_snapshot!(&data);
1938 }
1939
1940 #[test]
1941 fn test_does_not_scrub_if_no_graphql() {
1942 let mut data = Event::from_value(
1943 json!({
1944 "request": {
1945 "data": {
1946 "query": "{\n viewer {\n login\n }\n}",
1947 "variables": {
1948 "login": "foo"
1949 }
1950 },
1951 },
1952 "contexts": {
1953 "response": {
1954 "type": "response",
1955 "data": {
1956 "data": {
1957 "viewer": {
1958 "login": "foo"
1959 }
1960 }
1961 }
1962 }
1963 }
1964 })
1965 .into(),
1966 );
1967
1968 let scrubbing_config = DataScrubbingConfig {
1969 scrub_data: true,
1970 scrub_ip_addresses: true,
1971 scrub_defaults: true,
1972 ..Default::default()
1973 };
1974
1975 let pii_config = to_pii_config(&scrubbing_config).unwrap();
1976 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1977
1978 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1979
1980 assert_debug_snapshot!(&data);
1981 }
1982
1983 #[test]
1984 fn test_logentry_params_scrubbed() {
1985 let config = serde_json::from_str::<PiiConfig>(
1986 r##"
1987 {
1988 "applications": {
1989 "$string": ["@anything:remove"]
1990 }
1991 }
1992 "##,
1993 )
1994 .unwrap();
1995
1996 let mut event = Annotated::new(Event {
1997 logentry: Annotated::new(LogEntry {
1998 message: Annotated::new(Message::from("failed to parse report id=%s".to_owned())),
1999 formatted: Annotated::new("failed to parse report id=1".to_owned().into()),
2000 params: Annotated::new(Value::Array(vec![Annotated::new(Value::String(
2001 "12345".to_owned(),
2002 ))])),
2003 ..Default::default()
2004 }),
2005 ..Default::default()
2006 });
2007
2008 let mut processor = PiiProcessor::new(config.compiled());
2009 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2010
2011 let params = get_value!(event.logentry.params!);
2012 assert_debug_snapshot!(params, @r###"
2013 Array(
2014 [
2015 Meta {
2016 remarks: [
2017 Remark {
2018 ty: Removed,
2019 rule_id: "@anything:remove",
2020 range: None,
2021 },
2022 ],
2023 errors: [],
2024 original_length: None,
2025 original_value: None,
2026 },
2027 ],
2028 )
2029 "###);
2030 }
2031
2032 #[test]
2033 fn test_is_pairlist() {
2034 for (case, expected) in [
2035 (r#"[]"#, false),
2036 (r#"["foo"]"#, false),
2037 (r#"["foo", 123]"#, false),
2038 (r#"[[1, "foo"]]"#, false),
2039 (r#"[[["too_nested", 123]]]"#, false),
2040 (r#"[["foo", "bar"], [1, "foo"]]"#, false),
2041 (r#"[["foo", "bar"], ["foo", "bar", "baz"]]"#, false),
2042 (r#"[["foo", "bar", "baz"], ["foo", "bar"]]"#, false),
2043 (r#"["foo", ["bar", "baz"], ["foo", "bar"]]"#, false),
2044 (r#"[["foo", "bar"], [["too_nested", 123]]]"#, false),
2045 (r#"[["foo", 123]]"#, true),
2046 (r#"[["foo", "bar"]]"#, true),
2047 (
2048 r#"[["foo", "bar"], ["foo", {"nested": {"something": 1}}]]"#,
2049 true,
2050 ),
2051 ] {
2052 let v = Annotated::<Value>::from_json(case).unwrap();
2053 let Annotated(Some(Value::Array(mut a)), _) = v else {
2054 panic!()
2055 };
2056 assert_eq!(is_pairlist(&mut a), expected, "{case}");
2057 }
2058 }
2059
2060 #[test]
2061 fn test_tuple_array_scrubbed_with_path_selector() {
2062 let configs = vec![
2064 r##"
2067 {
2068 "applications": {
2069 "exception.values.0.stacktrace.frames.0.vars.headers.authorization": ["@anything:replace"]
2070 }
2071 }
2072 "##,
2073 r##"
2075 {
2076 "applications": {
2077 "exception.values.0.stacktrace.frames.0.vars.headers.0.1": ["@anything:replace"]
2078 }
2079 }
2080 "##,
2081 ];
2082
2083 let mut event = Event::from_value(
2084 serde_json::json!(
2085 {
2086 "message": "hi",
2087 "exception": {
2088 "values": [
2089 {
2090 "type": "BrokenException",
2091 "value": "Something failed",
2092 "stacktrace": {
2093 "frames": [
2094 {
2095 "vars": {
2096 "headers": [
2097 ["authorization", "Bearer abc123"]
2098 ]
2099 }
2100 }
2101 ]
2102 }
2103 }
2104 ]
2105 }
2106 })
2107 .into(),
2108 );
2109
2110 for config in configs {
2111 let config = serde_json::from_str::<PiiConfig>(config).unwrap();
2112 let mut processor = PiiProcessor::new(config.compiled());
2113 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2114
2115 let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
2116
2117 allow_duplicates!(assert_debug_snapshot!(vars, @r###"
2118 FrameVars(
2119 {
2120 "headers": Array(
2121 [
2122 Array(
2123 [
2124 String(
2125 "authorization",
2126 ),
2127 Annotated(
2128 String(
2129 "[Filtered]",
2130 ),
2131 Meta {
2132 remarks: [
2133 Remark {
2134 ty: Substituted,
2135 rule_id: "@anything:replace",
2136 range: Some(
2137 (
2138 0,
2139 10,
2140 ),
2141 ),
2142 },
2143 ],
2144 errors: [],
2145 original_length: Some(
2146 13,
2147 ),
2148 original_value: None,
2149 },
2150 ),
2151 ],
2152 ),
2153 ],
2154 ),
2155 },
2156 )
2157 "###));
2158 }
2159 }
2160
2161 #[test]
2162 fn test_tuple_array_scrubbed_with_string_selector_and_password_matcher() {
2163 let config = serde_json::from_str::<PiiConfig>(
2164 r##"
2165 {
2166 "applications": {
2167 "$string": ["@password:remove"]
2168 }
2169 }
2170 "##,
2171 )
2172 .unwrap();
2173
2174 let mut event = Event::from_value(
2175 serde_json::json!(
2176 {
2177 "message": "hi",
2178 "exception": {
2179 "values": [
2180 {
2181 "type": "BrokenException",
2182 "value": "Something failed",
2183 "stacktrace": {
2184 "frames": [
2185 {
2186 "vars": {
2187 "headers": [
2188 ["authorization", "abc123"]
2189 ]
2190 }
2191 }
2192 ]
2193 }
2194 }
2195 ]
2196 }
2197 })
2198 .into(),
2199 );
2200
2201 let mut processor = PiiProcessor::new(config.compiled());
2202 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2203
2204 let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
2205
2206 assert_debug_snapshot!(vars, @r###"
2207 FrameVars(
2208 {
2209 "headers": Array(
2210 [
2211 Array(
2212 [
2213 String(
2214 "authorization",
2215 ),
2216 Meta {
2217 remarks: [
2218 Remark {
2219 ty: Removed,
2220 rule_id: "@password:remove",
2221 range: None,
2222 },
2223 ],
2224 errors: [],
2225 original_length: None,
2226 original_value: None,
2227 },
2228 ],
2229 ),
2230 ],
2231 ),
2232 },
2233 )
2234 "###);
2235 }
2236}