1use std::borrow::Cow;
2use std::collections::{BTreeMap, BTreeSet};
3use std::mem;
4use std::sync::OnceLock;
5
6use regex::Regex;
7use relay_event_schema::processor::{
8 self, Chunk, FieldAttrs, Pii, ProcessValue, ProcessingAction, ProcessingResult,
9 ProcessingState, Processor, ValueType, enum_set, process_value,
10};
11use relay_event_schema::protocol::{
12 AsPair, Event, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User,
13};
14use relay_protocol::{Annotated, Array, Meta, Remark, RemarkType, Value};
15
16use crate::compiledconfig::{CompiledPiiConfig, RuleRef};
17use crate::config::RuleType;
18use crate::redactions::Redaction;
19use crate::regexes::{self, ANYTHING_REGEX, PatternType, ReplaceBehavior};
20use crate::utils;
21
22#[derive(Debug, Clone, Copy)]
24pub enum AttributeMode {
25 Object,
28 ValueOnly,
31}
32
33pub struct PiiProcessor<'a> {
35 attribute_mode: AttributeMode,
37 compiled_config: &'a CompiledPiiConfig,
38}
39
40impl<'a> PiiProcessor<'a> {
41 pub fn new(compiled_config: &'a CompiledPiiConfig) -> PiiProcessor<'a> {
43 PiiProcessor {
46 compiled_config,
47 attribute_mode: AttributeMode::Object,
48 }
49 }
50
51 pub fn attribute_mode(mut self, attribute_mode: AttributeMode) -> Self {
53 self.attribute_mode = attribute_mode;
54 self
55 }
56
57 fn apply_all_rules(
58 &self,
59 meta: &mut Meta,
60 state: &ProcessingState<'_>,
61 mut value: Option<&mut String>,
62 ) -> ProcessingResult {
63 let pii = state.pii();
64 if pii == Pii::False {
65 return Ok(());
66 }
67
68 for (selector, rules) in self.compiled_config.applications.iter() {
69 if selector.matches_path(&state.path()) {
70 #[allow(clippy::needless_option_as_deref)]
71 for rule in rules {
72 let reborrowed_value = value.as_deref_mut();
73 apply_rule_to_value(meta, rule, state.path().key(), reborrowed_value)?;
74 }
75 }
76 }
77
78 Ok(())
79 }
80}
81
82impl Processor for PiiProcessor<'_> {
83 fn before_process<T: ProcessValue>(
84 &mut self,
85 value: Option<&T>,
86 meta: &mut Meta,
87 state: &ProcessingState<'_>,
88 ) -> ProcessingResult {
89 if let Some(Value::String(original_value)) = meta.original_value_as_mut() {
90 if let Some(parent) = state.iter().next() {
94 let path = state.path();
95 let new_state = parent.enter_borrowed(
96 path.key().unwrap_or(""),
97 Some(Cow::Borrowed(state.attrs())),
98 enum_set!(ValueType::String),
99 );
100
101 if self
102 .apply_all_rules(&mut Meta::default(), &new_state, Some(original_value))
103 .is_err()
104 {
105 meta.set_original_value(Option::<String>::None);
107 }
108 }
109 }
110
111 if state.value_type().contains(ValueType::Boolean)
113 || state.value_type().contains(ValueType::String)
114 {
115 return Ok(());
116 }
117
118 if value.is_none() {
119 return Ok(());
120 }
121
122 self.apply_all_rules(meta, state, None)
124 }
125
126 fn process_array<T>(
127 &mut self,
128 array: &mut Array<T>,
129 _meta: &mut Meta,
130 state: &ProcessingState<'_>,
131 ) -> ProcessingResult
132 where
133 T: ProcessValue,
134 {
135 if is_pairlist(array) {
136 for annotated in array {
137 let mut mapped = mem::take(annotated).map_value(T::into_value);
138
139 if let Some(Value::Array(pair)) = mapped.value_mut() {
140 let mut value = mem::take(&mut pair[1]);
141 let value_type = ValueType::for_field(&value);
142
143 if let Some(key_name) = &pair[0].as_str() {
144 let key_state =
147 state.enter_borrowed(key_name, state.inner_attrs(), value_type);
148 process_value(&mut value, self, &key_state)?;
151 }
152
153 pair[1] = value;
155 }
156
157 *annotated = T::from_value(mapped);
159 }
160
161 Ok(())
162 } else {
163 array.process_child_values(self, state)
165 }
166 }
167
168 fn process_string(
169 &mut self,
170 value: &mut String,
171 meta: &mut Meta,
172 state: &ProcessingState<'_>,
173 ) -> ProcessingResult {
174 if let "" | "true" | "false" | "null" | "undefined" = value.as_str() {
175 return Ok(());
176 }
177
178 self.apply_all_rules(meta, state, Some(value))
181 }
182
183 fn process_native_image_path(
184 &mut self,
185 NativeImagePath(value): &mut NativeImagePath,
186 meta: &mut Meta,
187 state: &ProcessingState<'_>,
188 ) -> ProcessingResult {
189 if let Some(index) = value.rfind(['/', '\\']) {
200 let basename = value.split_off(index);
201 match self.process_string(value, meta, state) {
202 Ok(()) => value.push_str(&basename),
203 Err(ProcessingAction::DeleteValueHard) | Err(ProcessingAction::DeleteValueSoft) => {
204 basename[1..].clone_into(value);
205 }
206 Err(ProcessingAction::InvalidTransaction(x)) => {
207 return Err(ProcessingAction::InvalidTransaction(x));
208 }
209 }
210 }
211
212 Ok(())
213 }
214
215 fn process_pairlist<T: ProcessValue + AsPair>(
216 &mut self,
217 value: &mut PairList<T>,
218 _meta: &mut Meta,
219 state: &ProcessingState,
220 ) -> ProcessingResult {
221 utils::process_pairlist(self, value, state)
222 }
223
224 fn process_attributes(
225 &mut self,
226 value: &mut relay_event_schema::protocol::Attributes,
227 _meta: &mut Meta,
228 state: &ProcessingState,
229 ) -> ProcessingResult {
230 match self.attribute_mode {
231 AttributeMode::Object => value.process_child_values(self, state),
233 AttributeMode::ValueOnly => {
235 for (key, attribute) in value.0.iter_mut() {
236 let Some(attribute) = attribute.value_mut() else {
237 continue;
238 };
239
240 let attrs = FieldAttrs::new()
245 .pii_dynamic(relay_event_schema::protocol::attribute_pii_from_conventions);
246 let inner_value = &mut attribute.value.value;
247 let inner_value_type = ValueType::for_field(inner_value);
248 let entered =
249 state.enter_borrowed(key, Some(Cow::Borrowed(&attrs)), inner_value_type);
250
251 processor::process_value(inner_value, self, &entered)?;
252 self.process_other(&mut attribute.other, state)?;
253 }
254 Ok(())
255 }
256 }
257 }
258
259 fn process_user(
260 &mut self,
261 user: &mut User,
262 _meta: &mut Meta,
263 state: &ProcessingState<'_>,
264 ) -> ProcessingResult {
265 let ip_was_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
266
267 user.process_child_values(self, state)?;
269
270 let has_other_fields = user.id.value().is_some()
271 || user.username.value().is_some()
272 || user.email.value().is_some();
273
274 let ip_is_still_valid = user.ip_address.value().is_none_or(IpAddr::is_valid);
275
276 if ip_was_valid && !has_other_fields && !ip_is_still_valid {
286 user.id = mem::take(&mut user.ip_address).map_value(|ip| ip.into_inner().into());
287 user.ip_address.meta_mut().add_remark(Remark::new(
288 RemarkType::Removed,
289 "pii:ip_address".to_owned(),
290 ));
291 }
292
293 Ok(())
294 }
295
296 fn process_replay(
298 &mut self,
299 replay: &mut Replay,
300 _meta: &mut Meta,
301 state: &ProcessingState<'_>,
302 ) -> ProcessingResult {
303 replay.process_child_values(self, state)?;
304 Ok(())
305 }
306}
307
308#[derive(Default)]
309struct PairListProcessor {
310 is_pair: bool,
311 has_string_key: bool,
312}
313
314impl PairListProcessor {
315 fn is_pair_array(&self) -> bool {
318 self.is_pair && self.has_string_key
319 }
320}
321
322impl Processor for PairListProcessor {
323 fn process_array<T>(
324 &mut self,
325 value: &mut Array<T>,
326 _meta: &mut Meta,
327 state: &ProcessingState<'_>,
328 ) -> ProcessingResult
329 where
330 T: ProcessValue,
331 {
332 self.is_pair = state.depth() == 0 && value.len() == 2;
333 if self.is_pair {
334 let key_type = ValueType::for_field(&value[0]);
335 process_value(
336 &mut value[0],
337 self,
338 &state.enter_index(0, state.inner_attrs(), key_type),
339 )?;
340 }
341
342 Ok(())
343 }
344
345 fn process_string(
346 &mut self,
347 _value: &mut String,
348 _meta: &mut Meta,
349 state: &ProcessingState<'_>,
350 ) -> ProcessingResult where {
351 if state.depth() == 1 && state.path().index() == Some(0) {
352 self.has_string_key = true;
353 }
354
355 Ok(())
356 }
357}
358
359fn is_pairlist<T: ProcessValue>(array: &mut Array<T>) -> bool {
360 for element in array.iter_mut() {
361 let mut visitor = PairListProcessor::default();
362 process_value(element, &mut visitor, ProcessingState::root()).ok();
363 if !visitor.is_pair_array() {
364 return false;
365 }
366 }
367
368 !array.is_empty()
369}
370
371pub fn scrub_graphql(event: &mut Event) {
373 let mut keys: BTreeSet<&str> = BTreeSet::new();
374
375 let mut is_graphql = false;
376
377 if let Some(request) = event.request.value_mut()
379 && let Some(Value::Object(data)) = request.data.value_mut()
380 {
381 if let Some(api_target) = request.api_target.value()
382 && api_target.eq_ignore_ascii_case("graphql")
383 {
384 is_graphql = true;
385 }
386
387 if is_graphql
388 && let Some(Annotated(Some(Value::Object(variables)), _)) = data.get_mut("variables")
389 {
390 for (key, value) in variables.iter_mut() {
391 keys.insert(key);
392 value.set_value(Some(Value::String("[Filtered]".to_owned())));
393 }
394 }
395 }
396
397 if !is_graphql {
398 return;
399 }
400
401 if let Some(contexts) = event.contexts.value_mut()
403 && let Some(response) = contexts.get_mut::<ResponseContext>()
404 && let Some(Value::Object(data)) = response.data.value_mut()
405 && let Some(Annotated(Some(Value::Object(graphql_data)), _)) = data.get_mut("data")
406 {
407 if !keys.is_empty() {
408 scrub_graphql_data(&keys, graphql_data);
409 } else {
410 data.remove("data");
413 }
414 }
415}
416
417fn scrub_graphql_data(keys: &BTreeSet<&str>, data: &mut BTreeMap<String, Annotated<Value>>) {
419 for (key, value) in data.iter_mut() {
420 match value.value_mut() {
421 Some(Value::Object(item_data)) => {
422 scrub_graphql_data(keys, item_data);
423 }
424 _ => {
425 if keys.contains(key.as_str()) {
426 value.set_value(Some(Value::String("[Filtered]".to_owned())));
427 }
428 }
429 }
430 }
431}
432
433fn apply_rule_to_value(
434 meta: &mut Meta,
435 rule: &RuleRef,
436 key: Option<&str>,
437 mut value: Option<&mut String>,
438) -> ProcessingResult {
439 let should_redact_chunks = !matches!(rule.redaction, Redaction::Default | Redaction::Remove);
442
443 if rule.ty == RuleType::Anything && (value.is_none() || !should_redact_chunks) {
446 meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
448 return Err(ProcessingAction::DeleteValueHard);
449 }
450
451 macro_rules! apply_regex {
452 ($regex:expr, $replace_behavior:expr) => {
453 if let Some(ref mut value) = value {
454 processor::process_chunked_value(value, meta, |chunks| {
455 apply_regex_to_chunks(chunks, rule, $regex, $replace_behavior)
456 });
457 }
458 };
459 }
460
461 for (pattern_type, regex, replace_behavior) in regexes::get_regex_for_rule_type(&rule.ty) {
462 match pattern_type {
463 PatternType::KeyValue => {
464 if regex.is_match(key.unwrap_or("")) {
465 if value.is_some() && should_redact_chunks {
466 apply_regex!(&ANYTHING_REGEX, replace_behavior);
469 } else {
470 meta.add_remark(Remark::new(RemarkType::Removed, rule.origin.clone()));
471 return Err(ProcessingAction::DeleteValueHard);
472 }
473 } else {
474 apply_regex!(regex, replace_behavior);
477 }
478 }
479 PatternType::Value => {
480 apply_regex!(regex, replace_behavior);
481 }
482 }
483 }
484
485 Ok(())
486}
487
488fn apply_regex_to_chunks<'a>(
489 chunks: Vec<Chunk<'a>>,
490 rule: &RuleRef,
491 regex: &Regex,
492 replace_behavior: ReplaceBehavior,
493) -> Vec<Chunk<'a>> {
494 let mut search_string = String::new();
499 let mut has_text = false;
500 for chunk in &chunks {
501 match chunk {
502 Chunk::Text { text } => {
503 has_text = true;
504 search_string.push_str(&text.replace('\x00', ""));
505 }
506 Chunk::Redaction { .. } => search_string.push('\x00'),
507 }
508 }
509
510 if !has_text {
511 return chunks;
513 }
514
515 let mut captures_iter = regex.captures_iter(&search_string).peekable();
517 if captures_iter.peek().is_none() {
518 return chunks;
519 }
520
521 let mut replacement_chunks = vec![];
522 for chunk in chunks {
523 if let Chunk::Redaction { .. } = chunk {
524 replacement_chunks.push(chunk);
525 }
526 }
527 replacement_chunks.reverse();
528
529 fn process_text<'a>(
530 text: &str,
531 rv: &mut Vec<Chunk<'a>>,
532 replacement_chunks: &mut Vec<Chunk<'a>>,
533 ) {
534 if text.is_empty() {
535 return;
536 }
537
538 static NULL_SPLIT_RE: OnceLock<Regex> = OnceLock::new();
539 let regex = NULL_SPLIT_RE.get_or_init(|| {
540 #[allow(clippy::trivial_regex)]
541 Regex::new("\x00").unwrap()
542 });
543
544 let mut pos = 0;
545 for piece in regex.find_iter(text) {
546 rv.push(Chunk::Text {
547 text: Cow::Owned(text[pos..piece.start()].to_string()),
548 });
549 rv.push(replacement_chunks.pop().unwrap());
550 pos = piece.end();
551 }
552
553 rv.push(Chunk::Text {
554 text: Cow::Owned(text[pos..].to_string()),
555 });
556 }
557
558 let mut pos = 0;
559 let mut rv = Vec::with_capacity(replacement_chunks.len());
560
561 match replace_behavior {
562 ReplaceBehavior::Groups(ref groups) => {
563 for m in captures_iter {
564 for (idx, g) in m.iter().enumerate() {
565 if let Some(g) = g
566 && groups.contains(&(idx as u8))
567 {
568 process_text(
569 &search_string[pos..g.start()],
570 &mut rv,
571 &mut replacement_chunks,
572 );
573 insert_replacement_chunks(rule, g.as_str(), &mut rv);
574 pos = g.end();
575 }
576 }
577 }
578 process_text(&search_string[pos..], &mut rv, &mut replacement_chunks);
579 debug_assert!(replacement_chunks.is_empty());
580 }
581 ReplaceBehavior::Value => {
582 insert_replacement_chunks(rule, &search_string, &mut rv);
586 }
587 }
588 rv
589}
590
591fn insert_replacement_chunks(rule: &RuleRef, text: &str, output: &mut Vec<Chunk<'_>>) {
592 match &rule.redaction {
593 Redaction::Default | Redaction::Remove => {
594 output.push(Chunk::Redaction {
595 text: Cow::Borrowed(""),
596 rule_id: Cow::Owned(rule.origin.to_string()),
597 ty: RemarkType::Removed,
598 });
599 }
600 Redaction::Mask => {
601 let buf = vec!['*'; text.chars().count()];
602
603 output.push(Chunk::Redaction {
604 ty: RemarkType::Masked,
605 rule_id: Cow::Owned(rule.origin.to_string()),
606 text: buf.into_iter().collect(),
607 })
608 }
609 Redaction::Hash => {
610 output.push(Chunk::Redaction {
611 ty: RemarkType::Pseudonymized,
612 rule_id: Cow::Owned(rule.origin.to_string()),
613 text: Cow::Owned(utils::hash_value(text.as_bytes())),
614 });
615 }
616 Redaction::Replace(replace) => {
617 output.push(Chunk::Redaction {
618 ty: RemarkType::Substituted,
619 rule_id: Cow::Owned(rule.origin.to_string()),
620 text: Cow::Owned(replace.text.clone()),
621 });
622 }
623 Redaction::Other => relay_log::debug!("Incoming redaction is not supported"),
624 }
625}
626
627#[cfg(test)]
628mod tests {
629 use insta::{allow_duplicates, assert_debug_snapshot};
630 use relay_event_schema::processor::process_value;
631 use relay_event_schema::protocol::{
632 Addr, Breadcrumb, DebugImage, DebugMeta, ExtraValue, Headers, LogEntry, Message,
633 NativeDebugImage, Request, Span, TagEntry, Tags, TraceContext,
634 };
635 use relay_protocol::{FromValue, Object, assert_annotated_snapshot, get_value};
636 use serde_json::json;
637
638 use super::*;
639 use crate::{DataScrubbingConfig, PiiConfig, ReplaceRedaction};
640
641 fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Option<PiiConfig> {
642 use crate::convert::to_pii_config as to_pii_config_impl;
643 let rv = to_pii_config_impl(datascrubbing_config).unwrap();
644 if let Some(ref config) = rv {
645 let roundtrip: PiiConfig =
646 serde_json::from_value(serde_json::to_value(config).unwrap()).unwrap();
647 assert_eq!(&roundtrip, config);
648 }
649 rv
650 }
651
652 #[test]
653 fn test_scrub_original_value() {
654 let mut data = Event::from_value(
655 json!({
656 "user": {
657 "username": "hey man 73.133.27.120", "ip_address": "is this an ip address? 73.133.27.120", },
660 "hpkp":"invalid data my ip address is 74.133.27.120 and my credit card number is 4571234567890111 ",
661 })
662 .into(),
663 );
664
665 let scrubbing_config = DataScrubbingConfig {
666 scrub_data: true,
667 scrub_ip_addresses: true,
668 scrub_defaults: true,
669 ..Default::default()
670 };
671
672 let pii_config = to_pii_config(&scrubbing_config).unwrap();
673 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
674
675 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
676
677 assert_debug_snapshot!(&data);
678 }
679
680 #[test]
681 fn test_sentry_user() {
682 let mut data = Event::from_value(
683 json!({
684 "user": {
685 "ip_address": "73.133.27.120",
686 "sentry_user": "ip:73.133.27.120",
687 },
688 })
689 .into(),
690 );
691
692 let scrubbing_config = DataScrubbingConfig {
693 scrub_data: true,
694 scrub_ip_addresses: true,
695 scrub_defaults: true,
696 ..Default::default()
697 };
698
699 let pii_config = to_pii_config(&scrubbing_config).unwrap();
700 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
701
702 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
703
704 assert_debug_snapshot!(&data);
705 }
706
707 #[test]
708 fn test_basic_stripping() {
709 let config = serde_json::from_str::<PiiConfig>(
710 r#"
711 {
712 "rules": {
713 "remove_bad_headers": {
714 "type": "redact_pair",
715 "keyPattern": "(?i)cookie|secret[-_]?key"
716 }
717 },
718 "applications": {
719 "$string": ["@ip"],
720 "$object.**": ["remove_bad_headers"]
721 }
722 }
723 "#,
724 )
725 .unwrap();
726
727 let mut event = Annotated::new(Event {
728 logentry: Annotated::new(LogEntry {
729 formatted: Annotated::new("Hello world!".to_owned().into()),
730 ..Default::default()
731 }),
732 request: Annotated::new(Request {
733 env: {
734 let mut rv = Object::new();
735 rv.insert(
736 "SECRET_KEY".to_owned(),
737 Annotated::new(Value::String("134141231231231231231312".into())),
738 );
739 Annotated::new(rv)
740 },
741 headers: {
742 let rv = vec![
743 Annotated::new((
744 Annotated::new("Cookie".to_owned().into()),
745 Annotated::new("super secret".to_owned().into()),
746 )),
747 Annotated::new((
748 Annotated::new("X-Forwarded-For".to_owned().into()),
749 Annotated::new("127.0.0.1".to_owned().into()),
750 )),
751 ];
752 Annotated::new(Headers(PairList(rv)))
753 },
754 ..Default::default()
755 }),
756 tags: Annotated::new(Tags(
757 vec![Annotated::new(TagEntry(
758 Annotated::new("forwarded_for".to_owned()),
759 Annotated::new("127.0.0.1".to_owned()),
760 ))]
761 .into(),
762 )),
763 ..Default::default()
764 });
765
766 let mut processor = PiiProcessor::new(config.compiled());
767 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
768 assert_annotated_snapshot!(event);
769 }
770
771 #[test]
772 fn test_redact_containers() {
773 let config = serde_json::from_str::<PiiConfig>(
774 r#"
775 {
776 "applications": {
777 "$object": ["@anything"]
778 }
779 }
780 "#,
781 )
782 .unwrap();
783
784 let mut event = Annotated::new(Event {
785 extra: {
786 let mut map = Object::new();
787 map.insert(
788 "foo".to_owned(),
789 Annotated::new(ExtraValue(Value::String("bar".to_owned()))),
790 );
791 Annotated::new(map)
792 },
793 ..Default::default()
794 });
795
796 let mut processor = PiiProcessor::new(config.compiled());
797 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
798 assert_annotated_snapshot!(event);
799 }
800
801 #[test]
802 fn test_redact_custom_pattern() {
803 let config = serde_json::from_str::<PiiConfig>(
804 r#"
805 {
806 "applications": {
807 "$string": ["myrule"]
808 },
809 "rules": {
810 "myrule": {
811 "type": "pattern",
812 "pattern": "foo",
813 "redaction": {
814 "method": "replace",
815 "text": "asd"
816 }
817 }
818 }
819 }
820 "#,
821 )
822 .unwrap();
823
824 let mut event = Annotated::new(Event {
825 extra: {
826 let mut map = Object::new();
827 map.insert(
828 "myvalue".to_owned(),
829 Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
830 );
831 Annotated::new(map)
832 },
833 ..Default::default()
834 });
835
836 let mut processor = PiiProcessor::new(config.compiled());
837 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
838 assert_annotated_snapshot!(event);
839 }
840
841 #[test]
842 fn test_redact_custom_negative_pattern() {
843 let config = serde_json::from_str::<PiiConfig>(
844 r#"
845 {
846 "applications": {
847 "$string": ["myrule"]
848 },
849 "rules": {
850 "myrule": {
851 "type": "pattern",
852 "pattern": "the good string|.*OK.*|(.*)",
853 "replaceGroups": [1],
854 "redaction": {
855 "method": "mask"
856 }
857 }
858 }
859 }
860 "#,
861 )
862 .unwrap();
863
864 let mut event = Annotated::<Event>::from_json(
865 r#"{
866 "extra": {
867 "1": "the good string",
868 "2": "a bad string",
869 "3": "another OK string",
870 "4": "another bad one"
871 }
872 }"#,
873 )
874 .unwrap();
875
876 let mut processor = PiiProcessor::new(config.compiled());
877 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
878 assert_annotated_snapshot!(event.value().unwrap().extra, @r#"
879 {
880 "1": "the good string",
881 "2": "************",
882 "3": "another OK string",
883 "4": "***************",
884 "_meta": {
885 "2": {
886 "": {
887 "rem": [
888 [
889 "myrule",
890 "m",
891 0,
892 12
893 ]
894 ],
895 "len": 12
896 }
897 },
898 "4": {
899 "": {
900 "rem": [
901 [
902 "myrule",
903 "m",
904 0,
905 15
906 ]
907 ],
908 "len": 15
909 }
910 }
911 }
912 }
913 "#);
914 }
915
916 #[test]
917 fn test_no_field_upsert() {
918 let config = serde_json::from_str::<PiiConfig>(
919 r#"
920 {
921 "applications": {
922 "**": ["@anything:remove"]
923 }
924 }
925 "#,
926 )
927 .unwrap();
928
929 let mut event = Annotated::new(Event {
930 extra: {
931 let mut map = Object::new();
932 map.insert(
933 "myvalue".to_owned(),
934 Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
935 );
936 Annotated::new(map)
937 },
938 ..Default::default()
939 });
940
941 let mut processor = PiiProcessor::new(config.compiled());
942 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
943 assert_annotated_snapshot!(event);
944 }
945
946 #[test]
947 fn test_anything_hash_on_string() {
948 let config = serde_json::from_str::<PiiConfig>(
949 r#"
950 {
951 "applications": {
952 "$string": ["@anything:hash"]
953 }
954 }
955 "#,
956 )
957 .unwrap();
958
959 let mut event = Annotated::new(Event {
960 extra: {
961 let mut map = Object::new();
962 map.insert(
963 "myvalue".to_owned(),
964 Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
965 );
966 Annotated::new(map)
967 },
968 ..Default::default()
969 });
970
971 let mut processor = PiiProcessor::new(config.compiled());
972 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
973 assert_annotated_snapshot!(event);
974 }
975
976 #[test]
977 fn test_anything_hash_on_container() {
978 let config = serde_json::from_str::<PiiConfig>(
979 r#"
980 {
981 "applications": {
982 "$object": ["@anything:hash"]
983 }
984 }
985 "#,
986 )
987 .unwrap();
988
989 let mut event = Annotated::new(Event {
990 extra: {
991 let mut map = Object::new();
992 map.insert(
993 "myvalue".to_owned(),
994 Annotated::new(ExtraValue(Value::String("foobar".to_owned()))),
995 );
996 Annotated::new(map)
997 },
998 ..Default::default()
999 });
1000
1001 let mut processor = PiiProcessor::new(config.compiled());
1002 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1003 assert_annotated_snapshot!(event);
1004 }
1005
1006 #[test]
1007 fn test_ignore_user_agent_ip_scrubbing() {
1008 let mut data = Event::from_value(
1009 json!({
1010 "request": {
1011 "headers": [
1012 ["User-Agent", "127.0.0.1"],
1013 ["X-Client-Ip", "10.0.0.1"]
1014 ]
1015 },
1016 })
1017 .into(),
1018 );
1019
1020 let scrubbing_config = DataScrubbingConfig {
1021 scrub_data: true,
1022 scrub_ip_addresses: true,
1023 scrub_defaults: true,
1024 ..Default::default()
1025 };
1026
1027 let pii_config = to_pii_config(&scrubbing_config).unwrap();
1028 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1029
1030 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1031
1032 assert_annotated_snapshot!(&data);
1033 }
1034
1035 #[test]
1036 fn test_remove_debugmeta_path() {
1037 let config = serde_json::from_str::<PiiConfig>(
1038 r#"
1039 {
1040 "applications": {
1041 "debug_meta.images.*.code_file": ["@anything:remove"],
1042 "debug_meta.images.*.debug_file": ["@anything:remove"]
1043 }
1044 }
1045 "#,
1046 )
1047 .unwrap();
1048
1049 let mut event = Annotated::new(Event {
1050 debug_meta: Annotated::new(DebugMeta {
1051 images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1052 NativeDebugImage {
1053 code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1054 code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1055 debug_id: Annotated::new(
1056 "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1057 ),
1058 debug_file: Annotated::new("wntdll.pdb".into()),
1059 debug_checksum: Annotated::empty(),
1060 arch: Annotated::new("arm64".to_owned()),
1061 image_addr: Annotated::new(Addr(0)),
1062 image_size: Annotated::new(4096),
1063 image_vmaddr: Annotated::new(Addr(32768)),
1064 other: {
1065 let mut map = Object::new();
1066 map.insert(
1067 "other".to_owned(),
1068 Annotated::new(Value::String("value".to_owned())),
1069 );
1070 map
1071 },
1072 },
1073 )))]),
1074 ..Default::default()
1075 }),
1076 ..Default::default()
1077 });
1078
1079 let mut processor = PiiProcessor::new(config.compiled());
1080 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1081 assert_annotated_snapshot!(event);
1082 }
1083
1084 #[test]
1085 fn test_replace_debugmeta_path() {
1086 let config = serde_json::from_str::<PiiConfig>(
1087 r#"
1088 {
1089 "applications": {
1090 "debug_meta.images.*.code_file": ["@anything:replace"],
1091 "debug_meta.images.*.debug_file": ["@anything:replace"]
1092 }
1093 }
1094 "#,
1095 )
1096 .unwrap();
1097
1098 let mut event = Annotated::new(Event {
1099 debug_meta: Annotated::new(DebugMeta {
1100 images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1101 NativeDebugImage {
1102 code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1103 code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1104 debug_id: Annotated::new(
1105 "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1106 ),
1107 debug_file: Annotated::new("wntdll.pdb".into()),
1108 debug_checksum: Annotated::empty(),
1109 arch: Annotated::new("arm64".to_owned()),
1110 image_addr: Annotated::new(Addr(0)),
1111 image_size: Annotated::new(4096),
1112 image_vmaddr: Annotated::new(Addr(32768)),
1113 other: {
1114 let mut map = Object::new();
1115 map.insert(
1116 "other".to_owned(),
1117 Annotated::new(Value::String("value".to_owned())),
1118 );
1119 map
1120 },
1121 },
1122 )))]),
1123 ..Default::default()
1124 }),
1125 ..Default::default()
1126 });
1127
1128 let mut processor = PiiProcessor::new(config.compiled());
1129 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1130 assert_annotated_snapshot!(event);
1131 }
1132
1133 #[test]
1134 fn test_hash_debugmeta_path() {
1135 let config = serde_json::from_str::<PiiConfig>(
1136 r#"
1137 {
1138 "applications": {
1139 "debug_meta.images.*.code_file": ["@anything:hash"],
1140 "debug_meta.images.*.debug_file": ["@anything:hash"]
1141 }
1142 }
1143 "#,
1144 )
1145 .unwrap();
1146
1147 let mut event = Annotated::new(Event {
1148 debug_meta: Annotated::new(DebugMeta {
1149 images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1150 NativeDebugImage {
1151 code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1152 code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1153 debug_id: Annotated::new(
1154 "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1155 ),
1156 debug_file: Annotated::new("wntdll.pdb".into()),
1157 debug_checksum: Annotated::empty(),
1158 arch: Annotated::new("arm64".to_owned()),
1159 image_addr: Annotated::new(Addr(0)),
1160 image_size: Annotated::new(4096),
1161 image_vmaddr: Annotated::new(Addr(32768)),
1162 other: {
1163 let mut map = Object::new();
1164 map.insert(
1165 "other".to_owned(),
1166 Annotated::new(Value::String("value".to_owned())),
1167 );
1168 map
1169 },
1170 },
1171 )))]),
1172 ..Default::default()
1173 }),
1174 ..Default::default()
1175 });
1176
1177 let mut processor = PiiProcessor::new(config.compiled());
1178 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1179 assert_annotated_snapshot!(event);
1180 }
1181
1182 #[test]
1183 fn test_debugmeta_path_not_addressible_with_wildcard_selector() {
1184 let config = serde_json::from_str::<PiiConfig>(
1185 r#"
1186 {
1187 "applications": {
1188 "$string": ["@anything:remove"],
1189 "**": ["@anything:remove"],
1190 "debug_meta.**": ["@anything:remove"],
1191 "(debug_meta.images.**.code_file & $string)": ["@anything:remove"]
1192 }
1193 }
1194 "#,
1195 )
1196 .unwrap();
1197
1198 let mut event = Annotated::new(Event {
1199 debug_meta: Annotated::new(DebugMeta {
1200 images: Annotated::new(vec![Annotated::new(DebugImage::Symbolic(Box::new(
1201 NativeDebugImage {
1202 code_id: Annotated::new("59b0d8f3183000".parse().unwrap()),
1203 code_file: Annotated::new("C:\\Windows\\System32\\ntdll.dll".into()),
1204 debug_id: Annotated::new(
1205 "971f98e5-ce60-41ff-b2d7-235bbeb34578-1".parse().unwrap(),
1206 ),
1207 debug_file: Annotated::new("wntdll.pdb".into()),
1208 debug_checksum: Annotated::empty(),
1209 arch: Annotated::new("arm64".to_owned()),
1210 image_addr: Annotated::new(Addr(0)),
1211 image_size: Annotated::new(4096),
1212 image_vmaddr: Annotated::new(Addr(32768)),
1213 other: {
1214 let mut map = Object::new();
1215 map.insert(
1216 "other".to_owned(),
1217 Annotated::new(Value::String("value".to_owned())),
1218 );
1219 map
1220 },
1221 },
1222 )))]),
1223 ..Default::default()
1224 }),
1225 ..Default::default()
1226 });
1227
1228 let mut processor = PiiProcessor::new(config.compiled());
1229 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1230 assert_annotated_snapshot!(event);
1231 }
1232
1233 #[test]
1234 fn test_quoted_keys() {
1235 let config = serde_json::from_str::<PiiConfig>(
1236 r#"
1237 {
1238 "applications": {
1239 "extra.'special ,./<>?!@#$%^&*())''gärbage'''": ["@anything:remove"]
1240 }
1241 }
1242 "#,
1243 )
1244 .unwrap();
1245
1246 let mut event = Annotated::new(Event {
1247 extra: {
1248 let mut map = Object::new();
1249 map.insert(
1250 "do not ,./<>?!@#$%^&*())'ßtrip'".to_owned(),
1251 Annotated::new(ExtraValue(Value::String("foo".to_owned()))),
1252 );
1253 map.insert(
1254 "special ,./<>?!@#$%^&*())'gärbage'".to_owned(),
1255 Annotated::new(ExtraValue(Value::String("bar".to_owned()))),
1256 );
1257 Annotated::new(map)
1258 },
1259 ..Default::default()
1260 });
1261
1262 let mut processor = PiiProcessor::new(config.compiled());
1263 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1264 assert_annotated_snapshot!(event);
1265 }
1266
1267 #[test]
1268 fn test_logentry_value_types() {
1269 for formatted_selector in &[
1271 "$logentry.formatted",
1272 "$message",
1273 "$logentry.formatted && $message",
1274 "$string",
1275 ] {
1276 let config = serde_json::from_str::<PiiConfig>(&format!(
1277 r##"
1278 {{
1279 "applications": {{
1280 "{formatted_selector}": ["@anything:remove"]
1281 }}
1282 }}
1283 "##
1284 ))
1285 .unwrap();
1286
1287 let mut event = Annotated::new(Event {
1288 logentry: Annotated::new(LogEntry {
1289 formatted: Annotated::new("Hello world!".to_owned().into()),
1290 ..Default::default()
1291 }),
1292 ..Default::default()
1293 });
1294
1295 let mut processor = PiiProcessor::new(config.compiled());
1296 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1297 assert!(
1298 event
1299 .value()
1300 .unwrap()
1301 .logentry
1302 .value()
1303 .unwrap()
1304 .formatted
1305 .value()
1306 .is_none()
1307 );
1308 }
1309 }
1310
1311 #[test]
1312 fn test_logentry_formatted_never_fully_filtered() {
1313 let config = crate::convert::to_pii_config(&crate::DataScrubbingConfig {
1316 scrub_data: true,
1317 scrub_defaults: true,
1318 scrub_ip_addresses: true,
1319 ..Default::default()
1320 })
1321 .unwrap()
1322 .unwrap();
1323
1324 let mut event = Annotated::new(Event {
1325 logentry: Annotated::new(LogEntry {
1326 formatted: Annotated::new(
1327 "User john.doe@company.com failed login with card 4111-1111-1111-1111"
1328 .to_owned()
1329 .into(),
1330 ),
1331 ..Default::default()
1332 }),
1333 ..Default::default()
1334 });
1335
1336 let mut processor = PiiProcessor::new(config.compiled());
1337 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1338 assert_annotated_snapshot!(event, @r#"
1339 {
1340 "logentry": {
1341 "formatted": "User [email] failed login with card [creditcard]"
1342 },
1343 "_meta": {
1344 "logentry": {
1345 "formatted": {
1346 "": {
1347 "rem": [
1348 [
1349 "@email:replace",
1350 "s",
1351 5,
1352 12
1353 ],
1354 [
1355 "@creditcard:replace",
1356 "s",
1357 36,
1358 48
1359 ]
1360 ],
1361 "len": 68
1362 }
1363 }
1364 }
1365 }
1366 }
1367 "#);
1368 }
1369
1370 #[test]
1371 fn test_logentry_formatted_bearer_token_scrubbing() {
1372 let config = crate::convert::to_pii_config(&crate::DataScrubbingConfig {
1374 scrub_data: true,
1375 scrub_defaults: true,
1376 ..Default::default()
1377 })
1378 .unwrap()
1379 .unwrap();
1380
1381 let mut event = Annotated::new(Event {
1382 logentry: Annotated::new(LogEntry {
1383 formatted: Annotated::new(
1384 "API request failed with Bearer ABC123XYZ789TOKEN and other data"
1385 .to_owned()
1386 .into(),
1387 ),
1388 ..Default::default()
1389 }),
1390 ..Default::default()
1391 });
1392
1393 let mut processor = PiiProcessor::new(config.compiled());
1394 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1395 assert_annotated_snapshot!(event, @r#"
1396 {
1397 "logentry": {
1398 "formatted": "API request failed with Bearer [token] and other data"
1399 },
1400 "_meta": {
1401 "logentry": {
1402 "formatted": {
1403 "": {
1404 "rem": [
1405 [
1406 "@bearer:replace",
1407 "s",
1408 24,
1409 38
1410 ]
1411 ],
1412 "len": 63
1413 }
1414 }
1415 }
1416 }
1417 }
1418 "#);
1419 }
1420
1421 #[test]
1422 fn test_logentry_formatted_password_word_not_scrubbed() {
1423 let config = PiiConfig::default();
1424 let mut event = Annotated::new(Event {
1425 logentry: Annotated::new(LogEntry {
1426 formatted: Annotated::new(
1427 "User password is secret123 for authentication"
1428 .to_owned()
1429 .into(),
1430 ),
1431 ..Default::default()
1432 }),
1433 ..Default::default()
1434 });
1435
1436 let mut processor = PiiProcessor::new(config.compiled());
1437 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1438 assert_annotated_snapshot!(event, @r#"
1439 {
1440 "logentry": {
1441 "formatted": "User password is secret123 for authentication"
1442 }
1443 }
1444 "#);
1445 }
1446
1447 #[test]
1448 fn test_ip_address_hashing() {
1449 let config = serde_json::from_str::<PiiConfig>(
1450 r#"
1451 {
1452 "applications": {
1453 "$user.ip_address": ["@ip:hash"]
1454 }
1455 }
1456 "#,
1457 )
1458 .unwrap();
1459
1460 let mut event = Annotated::new(Event {
1461 user: Annotated::new(User {
1462 ip_address: Annotated::new(IpAddr("127.0.0.1".to_owned())),
1463 ..Default::default()
1464 }),
1465 ..Default::default()
1466 });
1467
1468 let mut processor = PiiProcessor::new(config.compiled());
1469 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1470
1471 let user = event.value().unwrap().user.value().unwrap();
1472
1473 assert!(user.ip_address.value().is_none());
1474
1475 assert_eq!(
1476 user.id.value().unwrap().as_str(),
1477 "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1478 );
1479 }
1480
1481 #[test]
1482 fn test_ip_address_hashing_does_not_overwrite_id() {
1483 let config = serde_json::from_str::<PiiConfig>(
1484 r#"
1485 {
1486 "applications": {
1487 "$user.ip_address": ["@ip:hash"]
1488 }
1489 }
1490 "#,
1491 )
1492 .unwrap();
1493
1494 let mut event = Annotated::new(Event {
1495 user: Annotated::new(User {
1496 id: Annotated::new("123".to_owned().into()),
1497 ip_address: Annotated::new(IpAddr("127.0.0.1".to_owned())),
1498 ..Default::default()
1499 }),
1500 ..Default::default()
1501 });
1502
1503 let mut processor = PiiProcessor::new(config.compiled());
1504 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
1505
1506 let user = event.value().unwrap().user.value().unwrap();
1507
1508 assert_eq!(
1510 user.ip_address.value().unwrap().as_str(),
1511 "AE12FE3B5F129B5CC4CDD2B136B7B7947C4D2741"
1512 );
1513
1514 assert_eq!(user.id.value().unwrap().as_str(), "123");
1515 }
1516
1517 #[test]
1518 fn test_replace_replaced_text() {
1519 let chunks = vec![Chunk::Redaction {
1520 text: "[ip]".into(),
1521 rule_id: "@ip".into(),
1522 ty: RemarkType::Substituted,
1523 }];
1524 let rule = RuleRef {
1525 id: "@ip:replace".into(),
1526 origin: "@ip".into(),
1527 ty: RuleType::Ip,
1528 redaction: Redaction::Replace(ReplaceRedaction {
1529 text: "[ip]".into(),
1530 }),
1531 };
1532 let res = apply_regex_to_chunks(
1533 chunks.clone(),
1534 &rule,
1535 &Regex::new(r#".*"#).unwrap(),
1536 ReplaceBehavior::Value,
1537 );
1538 assert_eq!(chunks, res);
1539 }
1540
1541 #[test]
1542 fn test_replace_replaced_text_anything() {
1543 let chunks = vec![Chunk::Redaction {
1544 text: "[Filtered]".into(),
1545 rule_id: "@password:filter".into(),
1546 ty: RemarkType::Substituted,
1547 }];
1548 let rule = RuleRef {
1549 id: "@anything:filter".into(),
1550 origin: "@anything:filter".into(),
1551 ty: RuleType::Anything,
1552 redaction: Redaction::Replace(ReplaceRedaction {
1553 text: "[Filtered]".into(),
1554 }),
1555 };
1556 let res = apply_regex_to_chunks(
1557 chunks.clone(),
1558 &rule,
1559 &Regex::new(r#".*"#).unwrap(),
1560 ReplaceBehavior::Groups(smallvec::smallvec![0]),
1561 );
1562 assert_eq!(chunks, res);
1563 }
1564
1565 #[test]
1566 fn test_trace_route_params_scrubbed() {
1567 let mut trace_context: Annotated<TraceContext> = Annotated::from_json(
1568 r#"
1569 {
1570 "type": "trace",
1571 "trace_id": "4c79f60c11214eb38604f4ae0781bfb2",
1572 "span_id": "fa90fdead5f74052",
1573 "data": {
1574 "previousRoute": {
1575 "params": {
1576 "password": "test"
1577 }
1578 }
1579 }
1580 }
1581 "#,
1582 )
1583 .unwrap();
1584
1585 let ds_config = DataScrubbingConfig {
1586 scrub_data: true,
1587 scrub_defaults: true,
1588 ..Default::default()
1589 };
1590 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1591 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1592
1593 process_value(
1594 &mut trace_context,
1595 &mut pii_processor,
1596 ProcessingState::root(),
1597 )
1598 .unwrap();
1599 assert_annotated_snapshot!(trace_context);
1600 }
1601
1602 #[test]
1603 fn test_scrub_span_data_http_not_scrubbed() {
1604 let mut span: Annotated<Span> = Annotated::from_json(
1605 r#"{
1606 "data": {
1607 "http": {
1608 "query": "dance=true"
1609 }
1610 }
1611 }"#,
1612 )
1613 .unwrap();
1614
1615 let ds_config = DataScrubbingConfig {
1616 scrub_data: true,
1617 scrub_defaults: true,
1618 ..Default::default()
1619 };
1620 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1621 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1622
1623 process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1624 assert_annotated_snapshot!(span);
1625 }
1626
1627 #[test]
1628 fn test_scrub_span_data_http_strings_are_scrubbed() {
1629 let mut span: Annotated<Span> = Annotated::from_json(
1630 r#"{
1631 "data": {
1632 "http": {
1633 "query": "ccnumber=5105105105105100&process_id=123",
1634 "fragment": "ccnumber=5105105105105100,process_id=123"
1635 }
1636 }
1637 }"#,
1638 )
1639 .unwrap();
1640
1641 let ds_config = DataScrubbingConfig {
1642 scrub_data: true,
1643 scrub_defaults: true,
1644 ..Default::default()
1645 };
1646 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1647 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1648
1649 process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1650 assert_annotated_snapshot!(span);
1651 }
1652
1653 #[test]
1654 fn test_scrub_span_data_http_objects_are_scrubbed() {
1655 let mut span: Annotated<Span> = Annotated::from_json(
1656 r#"{
1657 "data": {
1658 "http": {
1659 "query": {
1660 "ccnumber": "5105105105105100",
1661 "process_id": "123"
1662 },
1663 "fragment": {
1664 "ccnumber": "5105105105105100",
1665 "process_id": "123"
1666 }
1667 }
1668 }
1669 }"#,
1670 )
1671 .unwrap();
1672
1673 let ds_config = DataScrubbingConfig {
1674 scrub_data: true,
1675 scrub_defaults: true,
1676 ..Default::default()
1677 };
1678 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1679 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1680
1681 process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1682 assert_annotated_snapshot!(span);
1683 }
1684
1685 #[test]
1686 fn test_scrub_span_data_untyped_props_are_scrubbed() {
1687 let mut span: Annotated<Span> = Annotated::from_json(
1688 r#"{
1689 "data": {
1690 "untyped": "ccnumber=5105105105105100",
1691 "more_untyped": {
1692 "typed": "no",
1693 "scrubbed": "yes",
1694 "ccnumber": "5105105105105100"
1695 }
1696 }
1697 }"#,
1698 )
1699 .unwrap();
1700
1701 let ds_config = DataScrubbingConfig {
1702 scrub_data: true,
1703 scrub_defaults: true,
1704 ..Default::default()
1705 };
1706 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1707 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1708
1709 process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1710 assert_annotated_snapshot!(span);
1711 }
1712
1713 #[test]
1714 fn test_span_data_pii() {
1715 let mut span = Span::from_value(
1716 json!({
1717 "data": {
1718 "code.filepath": "src/sentry/api/authentication.py",
1719 }
1720 })
1721 .into(),
1722 );
1723
1724 let ds_config = DataScrubbingConfig {
1725 scrub_data: true,
1726 scrub_defaults: true,
1727 ..Default::default()
1728 };
1729 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1730
1731 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1732 processor::process_value(&mut span, &mut pii_processor, ProcessingState::root()).unwrap();
1733 assert_eq!(
1734 get_value!(span.data.code_filepath!).as_str(),
1735 Some("src/sentry/api/authentication.py")
1736 );
1737 }
1738
1739 #[test]
1740 fn test_csp_source_file_pii() {
1741 let mut event = Event::from_value(
1742 json!({
1743 "csp": {
1744 "source_file": "authentication.js",
1745 }
1746 })
1747 .into(),
1748 );
1749
1750 let config = serde_json::from_str::<PiiConfig>(
1751 r#"
1752 {
1753 "applications": {
1754 "csp.source_file": ["@anything:filter"]
1755 }
1756 }
1757 "#,
1758 )
1759 .unwrap();
1760
1761 let mut pii_processor = PiiProcessor::new(config.compiled());
1762 processor::process_value(&mut event, &mut pii_processor, ProcessingState::root()).unwrap();
1763 assert_eq!(get_value!(event.csp.source_file!).as_str(), "[Filtered]");
1764 }
1765
1766 #[test]
1767 fn test_scrub_breadcrumb_data_http_not_scrubbed() {
1768 let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1769 r#"{
1770 "data": {
1771 "http": {
1772 "query": "dance=true"
1773 }
1774 }
1775 }"#,
1776 )
1777 .unwrap();
1778
1779 let ds_config = DataScrubbingConfig {
1780 scrub_data: true,
1781 scrub_defaults: true,
1782 ..Default::default()
1783 };
1784 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1785 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1786 process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1787 assert_annotated_snapshot!(breadcrumb);
1788 }
1789
1790 #[test]
1791 fn test_scrub_breadcrumb_data_http_strings_are_scrubbed() {
1792 let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1793 r#"{
1794 "data": {
1795 "http": {
1796 "query": "ccnumber=5105105105105100&process_id=123",
1797 "fragment": "ccnumber=5105105105105100,process_id=123"
1798 }
1799 }
1800 }"#,
1801 )
1802 .unwrap();
1803
1804 let ds_config = DataScrubbingConfig {
1805 scrub_data: true,
1806 scrub_defaults: true,
1807 ..Default::default()
1808 };
1809 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1810 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1811 process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1812 assert_annotated_snapshot!(breadcrumb);
1813 }
1814
1815 #[test]
1816 fn test_scrub_breadcrumb_data_http_objects_are_scrubbed() {
1817 let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1818 r#"{
1819 "data": {
1820 "http": {
1821 "query": {
1822 "ccnumber": "5105105105105100",
1823 "process_id": "123"
1824 },
1825 "fragment": {
1826 "ccnumber": "5105105105105100",
1827 "process_id": "123"
1828 }
1829 }
1830 }
1831 }"#,
1832 )
1833 .unwrap();
1834
1835 let ds_config = DataScrubbingConfig {
1836 scrub_data: true,
1837 scrub_defaults: true,
1838 ..Default::default()
1839 };
1840 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1841 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1842
1843 process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1844 assert_annotated_snapshot!(breadcrumb);
1845 }
1846
1847 #[test]
1848 fn test_scrub_breadcrumb_data_untyped_props_are_scrubbed() {
1849 let mut breadcrumb: Annotated<Breadcrumb> = Annotated::from_json(
1850 r#"{
1851 "data": {
1852 "untyped": "ccnumber=5105105105105100",
1853 "more_untyped": {
1854 "typed": "no",
1855 "scrubbed": "yes",
1856 "ccnumber": "5105105105105100"
1857 }
1858 }
1859 }"#,
1860 )
1861 .unwrap();
1862
1863 let ds_config = DataScrubbingConfig {
1864 scrub_data: true,
1865 scrub_defaults: true,
1866 ..Default::default()
1867 };
1868 let pii_config = ds_config.pii_config().unwrap().as_ref().unwrap();
1869 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1870 process_value(&mut breadcrumb, &mut pii_processor, ProcessingState::root()).unwrap();
1871 assert_annotated_snapshot!(breadcrumb);
1872 }
1873
1874 #[test]
1875 fn test_scrub_graphql_response_data_with_variables() {
1876 let mut data = Event::from_value(
1877 json!({
1878 "request": {
1879 "data": {
1880 "query": "{\n viewer {\n login\n }\n}",
1881 "variables": {
1882 "login": "foo"
1883 }
1884 },
1885 "api_target": "graphql"
1886 },
1887 "contexts": {
1888 "response": {
1889 "type": "response",
1890 "data": {
1891 "data": {
1892 "viewer": {
1893 "login": "foo"
1894 }
1895 }
1896 }
1897 }
1898 }
1899 })
1900 .into(),
1901 );
1902
1903 scrub_graphql(data.value_mut().as_mut().unwrap());
1904
1905 assert_debug_snapshot!(&data);
1906 }
1907
1908 #[test]
1909 fn test_scrub_graphql_response_data_without_variables() {
1910 let mut data = Event::from_value(
1911 json!({
1912 "request": {
1913 "data": {
1914 "query": "{\n viewer {\n login\n }\n}"
1915 },
1916 "api_target": "graphql"
1917 },
1918 "contexts": {
1919 "response": {
1920 "type": "response",
1921 "data": {
1922 "data": {
1923 "viewer": {
1924 "login": "foo"
1925 }
1926 }
1927 }
1928 }
1929 }
1930 })
1931 .into(),
1932 );
1933
1934 scrub_graphql(data.value_mut().as_mut().unwrap());
1935 assert_debug_snapshot!(&data);
1936 }
1937
1938 #[test]
1939 fn test_does_not_scrub_if_no_graphql() {
1940 let mut data = Event::from_value(
1941 json!({
1942 "request": {
1943 "data": {
1944 "query": "{\n viewer {\n login\n }\n}",
1945 "variables": {
1946 "login": "foo"
1947 }
1948 },
1949 },
1950 "contexts": {
1951 "response": {
1952 "type": "response",
1953 "data": {
1954 "data": {
1955 "viewer": {
1956 "login": "foo"
1957 }
1958 }
1959 }
1960 }
1961 }
1962 })
1963 .into(),
1964 );
1965
1966 let scrubbing_config = DataScrubbingConfig {
1967 scrub_data: true,
1968 scrub_ip_addresses: true,
1969 scrub_defaults: true,
1970 ..Default::default()
1971 };
1972
1973 let pii_config = to_pii_config(&scrubbing_config).unwrap();
1974 let mut pii_processor = PiiProcessor::new(pii_config.compiled());
1975
1976 process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
1977
1978 assert_debug_snapshot!(&data);
1979 }
1980
1981 #[test]
1982 fn test_logentry_params_scrubbed() {
1983 let config = serde_json::from_str::<PiiConfig>(
1984 r##"
1985 {
1986 "applications": {
1987 "$string": ["@anything:remove"]
1988 }
1989 }
1990 "##,
1991 )
1992 .unwrap();
1993
1994 let mut event = Annotated::new(Event {
1995 logentry: Annotated::new(LogEntry {
1996 message: Annotated::new(Message::from("failed to parse report id=%s".to_owned())),
1997 formatted: Annotated::new("failed to parse report id=1".to_owned().into()),
1998 params: Annotated::new(Value::Array(vec![Annotated::new(Value::String(
1999 "12345".to_owned(),
2000 ))])),
2001 ..Default::default()
2002 }),
2003 ..Default::default()
2004 });
2005
2006 let mut processor = PiiProcessor::new(config.compiled());
2007 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2008
2009 let params = get_value!(event.logentry.params!);
2010 assert_debug_snapshot!(params, @r###"
2011 Array(
2012 [
2013 Meta {
2014 remarks: [
2015 Remark {
2016 ty: Removed,
2017 rule_id: "@anything:remove",
2018 range: None,
2019 },
2020 ],
2021 errors: [],
2022 original_length: None,
2023 original_value: None,
2024 },
2025 ],
2026 )
2027 "###);
2028 }
2029
2030 #[test]
2031 fn test_is_pairlist() {
2032 for (case, expected) in [
2033 (r#"[]"#, false),
2034 (r#"["foo"]"#, false),
2035 (r#"["foo", 123]"#, false),
2036 (r#"[[1, "foo"]]"#, false),
2037 (r#"[[["too_nested", 123]]]"#, false),
2038 (r#"[["foo", "bar"], [1, "foo"]]"#, false),
2039 (r#"[["foo", "bar"], ["foo", "bar", "baz"]]"#, false),
2040 (r#"[["foo", "bar", "baz"], ["foo", "bar"]]"#, false),
2041 (r#"["foo", ["bar", "baz"], ["foo", "bar"]]"#, false),
2042 (r#"[["foo", "bar"], [["too_nested", 123]]]"#, false),
2043 (r#"[["foo", 123]]"#, true),
2044 (r#"[["foo", "bar"]]"#, true),
2045 (
2046 r#"[["foo", "bar"], ["foo", {"nested": {"something": 1}}]]"#,
2047 true,
2048 ),
2049 ] {
2050 let v = Annotated::<Value>::from_json(case).unwrap();
2051 let Annotated(Some(Value::Array(mut a)), _) = v else {
2052 panic!()
2053 };
2054 assert_eq!(is_pairlist(&mut a), expected, "{case}");
2055 }
2056 }
2057
2058 #[test]
2059 fn test_tuple_array_scrubbed_with_path_selector() {
2060 let configs = vec![
2062 r##"
2065 {
2066 "applications": {
2067 "exception.values.0.stacktrace.frames.0.vars.headers.authorization": ["@anything:replace"]
2068 }
2069 }
2070 "##,
2071 r##"
2073 {
2074 "applications": {
2075 "exception.values.0.stacktrace.frames.0.vars.headers.0.1": ["@anything:replace"]
2076 }
2077 }
2078 "##,
2079 ];
2080
2081 let mut event = Event::from_value(
2082 serde_json::json!(
2083 {
2084 "message": "hi",
2085 "exception": {
2086 "values": [
2087 {
2088 "type": "BrokenException",
2089 "value": "Something failed",
2090 "stacktrace": {
2091 "frames": [
2092 {
2093 "vars": {
2094 "headers": [
2095 ["authorization", "Bearer abc123"]
2096 ]
2097 }
2098 }
2099 ]
2100 }
2101 }
2102 ]
2103 }
2104 })
2105 .into(),
2106 );
2107
2108 for config in configs {
2109 let config = serde_json::from_str::<PiiConfig>(config).unwrap();
2110 let mut processor = PiiProcessor::new(config.compiled());
2111 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2112
2113 let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
2114
2115 allow_duplicates!(assert_debug_snapshot!(vars, @r###"
2116 FrameVars(
2117 {
2118 "headers": Array(
2119 [
2120 Array(
2121 [
2122 String(
2123 "authorization",
2124 ),
2125 Annotated(
2126 String(
2127 "[Filtered]",
2128 ),
2129 Meta {
2130 remarks: [
2131 Remark {
2132 ty: Substituted,
2133 rule_id: "@anything:replace",
2134 range: Some(
2135 (
2136 0,
2137 10,
2138 ),
2139 ),
2140 },
2141 ],
2142 errors: [],
2143 original_length: Some(
2144 13,
2145 ),
2146 original_value: None,
2147 },
2148 ),
2149 ],
2150 ),
2151 ],
2152 ),
2153 },
2154 )
2155 "###));
2156 }
2157 }
2158
2159 #[test]
2160 fn test_tuple_array_scrubbed_with_string_selector_and_password_matcher() {
2161 let config = serde_json::from_str::<PiiConfig>(
2162 r##"
2163 {
2164 "applications": {
2165 "$string": ["@password:remove"]
2166 }
2167 }
2168 "##,
2169 )
2170 .unwrap();
2171
2172 let mut event = Event::from_value(
2173 serde_json::json!(
2174 {
2175 "message": "hi",
2176 "exception": {
2177 "values": [
2178 {
2179 "type": "BrokenException",
2180 "value": "Something failed",
2181 "stacktrace": {
2182 "frames": [
2183 {
2184 "vars": {
2185 "headers": [
2186 ["authorization", "abc123"]
2187 ]
2188 }
2189 }
2190 ]
2191 }
2192 }
2193 ]
2194 }
2195 })
2196 .into(),
2197 );
2198
2199 let mut processor = PiiProcessor::new(config.compiled());
2200 process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
2201
2202 let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap();
2203
2204 assert_debug_snapshot!(vars, @r###"
2205 FrameVars(
2206 {
2207 "headers": Array(
2208 [
2209 Array(
2210 [
2211 String(
2212 "authorization",
2213 ),
2214 Meta {
2215 remarks: [
2216 Remark {
2217 ty: Removed,
2218 rule_id: "@password:remove",
2219 range: None,
2220 },
2221 ],
2222 errors: [],
2223 original_length: None,
2224 original_value: None,
2225 },
2226 ],
2227 ),
2228 ],
2229 ),
2230 },
2231 )
2232 "###);
2233 }
2234}