1use std::borrow::Cow;
4use std::num::TryFromIntError;
5use std::ops::Range;
6use std::str::Utf8Error;
7
8use minidump::format::{
9 CvSignature, MINIDUMP_LOCATION_DESCRIPTOR, MINIDUMP_STREAM_TYPE as StreamType,
10};
11use minidump::{
12 Endian, Error as MinidumpError, Minidump, MinidumpMemoryList, MinidumpModuleList,
13 MinidumpThreadList,
14};
15use num_traits::FromPrimitive;
16use relay_event_schema::processor::{FieldAttrs, Pii, ValueType};
17use utf16string::{Utf16Error, WStr};
18
19use crate::{PiiAttachmentsProcessor, ScrubEncodings};
20
21#[derive(Debug, thiserror::Error)]
23pub enum ScrubMinidumpError {
24 #[error("failed to parse minidump")]
26 InvalidMinidump(#[from] MinidumpError),
27
28 #[error("invalid memory address")]
30 InvalidAddress,
31
32 #[error("minidump offsets out of usize range")]
34 OutOfRange,
35
36 #[error("string decoding error")]
38 Decoding,
39}
40
41impl From<TryFromIntError> for ScrubMinidumpError {
42 fn from(_source: TryFromIntError) -> Self {
43 Self::OutOfRange
44 }
45}
46
47impl From<Utf16Error> for ScrubMinidumpError {
48 fn from(_source: Utf16Error) -> Self {
49 Self::Decoding
50 }
51}
52
53impl From<Utf8Error> for ScrubMinidumpError {
54 fn from(_source: Utf8Error) -> Self {
55 Self::Decoding
56 }
57}
58
59#[derive(Debug, Clone, Eq, PartialEq, Hash)]
65enum MinidumpItem {
66 StackMemory(Range<usize>),
68 NonStackMemory(Range<usize>),
70 LinuxEnviron(Range<usize>),
74 LinuxCmdLine(Range<usize>),
78 CodeModuleName(Range<usize>),
80 DebugModuleName(Range<usize>),
82}
83
84struct MinidumpData<'a> {
86 data: &'a [u8],
87 minidump: Minidump<'a, &'a [u8]>,
88}
89
90impl<'a> MinidumpData<'a> {
91 fn parse(data: &'a [u8]) -> Result<Self, ScrubMinidumpError> {
96 let minidump = Minidump::read(data).map_err(ScrubMinidumpError::InvalidMinidump)?;
97 Ok(Self { data, minidump })
98 }
99
100 fn offset(&self, slice: &[u8]) -> Option<usize> {
106 let base = self.data.as_ptr() as usize;
107 let pointer = slice.as_ptr() as usize;
108
109 if pointer > base {
110 Some(pointer - base)
111 } else {
112 None
113 }
114 }
115
116 fn slice_range(&self, slice: &[u8]) -> Option<Range<usize>> {
118 let start = self.offset(slice)?;
119 let end = start + slice.len();
120 Some(start..end)
121 }
122
123 fn location_range(
127 &self,
128 location: MINIDUMP_LOCATION_DESCRIPTOR,
129 ) -> Result<Range<usize>, ScrubMinidumpError> {
130 let start: usize = location.rva.try_into()?;
131 let len: usize = location.data_size.try_into()?;
132 Ok(start..start + len)
133 }
134
135 fn raw_stream_range(
137 &self,
138 stream_type: StreamType,
139 ) -> Result<Option<Range<usize>>, ScrubMinidumpError> {
140 let range = match self.minidump.get_raw_stream(stream_type.into()) {
141 Ok(stream) => Some(
142 self.slice_range(stream)
143 .ok_or(ScrubMinidumpError::InvalidAddress)?,
144 ),
145 Err(MinidumpError::StreamNotFound) => None,
146 Err(e) => return Err(ScrubMinidumpError::InvalidMinidump(e)),
147 };
148 Ok(range)
149 }
150
151 fn items(&self) -> Result<Vec<MinidumpItem>, ScrubMinidumpError> {
153 let mut items = Vec::new();
154
155 let thread_list: MinidumpThreadList = self.minidump.get_stream()?;
156
157 let mem_list: MinidumpMemoryList = self.minidump.get_stream()?;
158 for mem in mem_list.iter() {
159 if thread_list
160 .threads
161 .iter()
162 .any(|t| t.raw.stack.memory.rva == mem.desc.memory.rva)
163 {
164 items.push(MinidumpItem::StackMemory(
165 self.location_range(mem.desc.memory)?,
166 ));
167 } else {
168 items.push(MinidumpItem::NonStackMemory(
169 self.location_range(mem.desc.memory)?,
170 ));
171 }
172 }
173
174 if let Some(range) = self.raw_stream_range(StreamType::LinuxEnviron)? {
175 items.push(MinidumpItem::LinuxEnviron(range));
176 }
177 if let Some(range) = self.raw_stream_range(StreamType::LinuxCmdLine)? {
178 items.push(MinidumpItem::LinuxCmdLine(range));
179 }
180
181 let mod_list: MinidumpModuleList = self.minidump.get_stream()?;
182 let mut rvas = Vec::new();
183 for module in mod_list.iter() {
184 let rva: usize = module.raw.module_name_rva.try_into()?;
185 if rvas.contains(&rva) {
186 continue;
187 } else {
188 rvas.push(rva);
189 }
190 let len_bytes = self
191 .data
192 .get(rva..)
193 .ok_or(ScrubMinidumpError::InvalidAddress)?;
194 let len: usize = u32_from_bytes(len_bytes, self.minidump.endian)?.try_into()?;
195 let start: usize = rva + 4;
196 items.push(MinidumpItem::CodeModuleName(start..start + len));
197
198 let codeview_loc = module.raw.cv_record;
200 let cv_start: usize = codeview_loc.rva.try_into()?;
201 let cv_len: usize = codeview_loc.data_size.try_into()?;
202 let signature_bytes = self
203 .data
204 .get(cv_start..)
205 .ok_or(ScrubMinidumpError::InvalidAddress)?;
206 let signature = u32_from_bytes(signature_bytes, self.minidump.endian)?;
207 match CvSignature::from_u32(signature) {
208 Some(CvSignature::Pdb70) => {
209 let offset: usize = 4 + (4 + 2 + 2 + 8) + 4; items.push(MinidumpItem::DebugModuleName(
211 (cv_start + offset)..(cv_start + cv_len),
212 ));
213 }
214 Some(CvSignature::Pdb20) => {
215 let offset: usize = 4 + 4 + 4 + 4; items.push(MinidumpItem::DebugModuleName(
217 (cv_start + offset)..(cv_start + cv_len),
218 ));
219 }
220 _ => {}
221 }
222 }
223
224 Ok(items)
225 }
226}
227
228fn u32_from_bytes(bytes: &[u8], endian: Endian) -> Result<u32, ScrubMinidumpError> {
233 let mut buf = [0u8; 4];
234 buf.copy_from_slice(bytes.get(..4).ok_or(ScrubMinidumpError::InvalidAddress)?);
235 match endian {
236 Endian::Little => Ok(u32::from_le_bytes(buf)),
237 Endian::Big => Ok(u32::from_be_bytes(buf)),
238 }
239}
240
241impl PiiAttachmentsProcessor<'_> {
242 pub fn scrub_minidump(
253 &self,
254 filename: &str,
255 data: &mut [u8],
256 ) -> Result<bool, ScrubMinidumpError> {
257 let file_state = self.state(filename, ValueType::Minidump);
258 let items = MinidumpData::parse(data)?.items()?;
259 let mut changed = false;
260
261 for item in items {
262 match item {
263 MinidumpItem::StackMemory(range) => {
264 let slice = data
267 .get_mut(range)
268 .ok_or(ScrubMinidumpError::InvalidAddress)?;
269
270 let attrs = Cow::Owned(FieldAttrs::new().pii(Pii::Maybe));
271 let state = file_state.enter_borrowed(
272 "stack_memory",
273 Some(attrs),
274 ValueType::Binary | ValueType::StackMemory,
275 );
276 changed |= self.scrub_bytes(slice, &state, ScrubEncodings::All);
277 }
278 MinidumpItem::NonStackMemory(range) => {
279 let slice = data
280 .get_mut(range)
281 .ok_or(ScrubMinidumpError::InvalidAddress)?;
282 let attrs = Cow::Owned(FieldAttrs::new().pii(Pii::True));
283 let state = file_state.enter_borrowed(
284 "heap_memory",
285 Some(attrs),
286 ValueType::Binary | ValueType::HeapMemory,
287 );
288 changed |= self.scrub_bytes(slice, &state, ScrubEncodings::All);
289 }
290 MinidumpItem::LinuxEnviron(range) | MinidumpItem::LinuxCmdLine(range) => {
291 let slice = data
292 .get_mut(range)
293 .ok_or(ScrubMinidumpError::InvalidAddress)?;
294 let attrs = Cow::Owned(FieldAttrs::new().pii(Pii::True));
295 let state = file_state.enter_borrowed("", Some(attrs), Some(ValueType::Binary));
296 changed |= self.scrub_bytes(slice, &state, ScrubEncodings::All);
297 }
298 MinidumpItem::CodeModuleName(range) => {
299 let slice = data
300 .get_mut(range)
301 .ok_or(ScrubMinidumpError::InvalidAddress)?;
302 let attrs = Cow::Owned(FieldAttrs::new().pii(Pii::True));
303 let state = file_state.enter_borrowed(
305 "code_file",
306 Some(attrs),
307 Some(ValueType::String),
308 );
309 let wstr = WStr::from_utf16le_mut(slice)?; changed |= self.scrub_utf16_filepath(wstr, &state);
311 }
312 MinidumpItem::DebugModuleName(range) => {
313 let slice = data
314 .get_mut(range)
315 .ok_or(ScrubMinidumpError::InvalidAddress)?;
316 let attrs = Cow::Owned(FieldAttrs::new().pii(Pii::True));
317 let state = file_state.enter_borrowed(
319 "debug_file",
320 Some(attrs),
321 Some(ValueType::String),
322 );
323 let s = std::str::from_utf8_mut(slice)?;
324 changed |= self.scrub_utf8_filepath(s, &state);
325 }
326 };
327 }
328
329 Ok(changed)
330 }
331}
332
333#[cfg(test)]
334mod tests {
335 use minidump::format::RVA;
336 use minidump::{MinidumpModule, Module};
337
338 use super::*;
339 use crate::config::PiiConfig;
340
341 struct TestScrubber {
342 orig_dump: Minidump<'static, &'static [u8]>,
343 _scrubbed_data: Vec<u8>,
344 scrubbed_dump: Minidump<'static, &'static [u8]>,
345 }
346
347 impl TestScrubber {
348 fn new(filename: &str, orig_data: &'static [u8], json: serde_json::Value) -> Self {
349 let orig_dump = Minidump::read(orig_data).expect("original minidump failed to parse");
350 let mut scrubbed_data = Vec::from(orig_data);
351
352 let config = serde_json::from_value::<PiiConfig>(json).expect("invalid config json");
353 let processor = PiiAttachmentsProcessor::new(config.compiled());
354 processor
355 .scrub_minidump(filename, scrubbed_data.as_mut_slice())
356 .expect("scrubbing failed");
357
358 let slice =
363 unsafe { std::mem::transmute::<&[u8], &'static [u8]>(scrubbed_data.as_slice()) };
364 let scrubbed_dump = Minidump::read(slice).expect("scrubbed minidump failed to parse");
365 Self {
366 orig_dump,
367 _scrubbed_data: scrubbed_data,
368 scrubbed_dump,
369 }
370 }
371 }
372
373 enum Which {
374 Original,
375 Scrubbed,
376 }
377
378 enum MemRegion {
379 Stack,
380 Heap,
381 }
382
383 impl TestScrubber {
384 fn main_module(&self, which: Which) -> MinidumpModule {
385 let dump = match which {
386 Which::Original => &self.orig_dump,
387 Which::Scrubbed => &self.scrubbed_dump,
388 };
389 let modules: MinidumpModuleList = dump.get_stream().unwrap();
390 modules.main_module().unwrap().clone()
391 }
392
393 fn other_modules(&self, which: Which) -> Vec<MinidumpModule> {
394 let dump = match which {
395 Which::Original => &self.orig_dump,
396 Which::Scrubbed => &self.scrubbed_dump,
397 };
398 let modules: MinidumpModuleList = dump.get_stream().unwrap();
399 let mut iter = modules.iter();
400 iter.next(); iter.cloned().collect()
402 }
403
404 fn memory_regions<'slf>(&'slf self, which: Which, region: MemRegion) -> Vec<&'slf [u8]> {
406 let dump: &'slf Minidump<&'static [u8]> = match which {
407 Which::Original => &self.orig_dump,
408 Which::Scrubbed => &self.scrubbed_dump,
409 };
410
411 let thread_list: MinidumpThreadList = dump.get_stream().unwrap();
412 let stack_rvas: Vec<RVA> = thread_list
413 .threads
414 .iter()
415 .map(|t| t.raw.stack.memory.rva)
416 .collect();
417
418 let mem_list: MinidumpMemoryList<'slf> = dump.get_stream().unwrap();
424 mem_list
425 .iter()
426 .filter(|mem| match region {
427 MemRegion::Stack => stack_rvas.contains(&mem.desc.memory.rva),
428 MemRegion::Heap => !stack_rvas.contains(&mem.desc.memory.rva),
429 })
430 .map(|mem| unsafe { std::mem::transmute(mem.bytes) })
431 .collect()
432 }
433
434 fn stacks(&self, which: Which) -> Vec<&[u8]> {
436 self.memory_regions(which, MemRegion::Stack)
437 }
438
439 fn heaps(&self, which: Which) -> Vec<&[u8]> {
441 self.memory_regions(which, MemRegion::Heap)
442 }
443
444 fn environ(&self, which: Which) -> &[u8] {
448 let dump = match which {
449 Which::Original => &self.orig_dump,
450 Which::Scrubbed => &self.scrubbed_dump,
451 };
452 dump.get_raw_stream(StreamType::LinuxEnviron.into())
453 .unwrap()
454 }
455 }
456
457 #[test]
458 fn test_module_list_removed_win() {
459 let scrubber = TestScrubber::new(
460 "windows.dmp",
461 include_bytes!("../../tests/fixtures/windows.dmp"),
462 serde_json::json!(
463 {
464 "applications": {
465 "debug_file": ["@anything:mask"],
466 "$attachments.'windows.dmp'.code_file": ["@anything:mask"]
467 }
468 }
469 ),
470 );
471
472 let main = scrubber.main_module(Which::Original);
473 assert_eq!(
474 main.code_file(),
475 "C:\\projects\\breakpad-tools\\windows\\Release\\crash.exe"
476 );
477 assert_eq!(
478 main.debug_file().unwrap(),
479 "C:\\projects\\breakpad-tools\\windows\\Release\\crash.pdb"
480 );
481
482 let main = scrubber.main_module(Which::Scrubbed);
483 assert_eq!(
484 main.code_file(),
485 "******************************************\\crash.exe"
486 );
487 assert_eq!(
488 main.debug_file().unwrap(),
489 "******************************************\\crash.pdb"
490 );
491
492 let modules = scrubber.other_modules(Which::Original);
493 for module in modules {
494 assert!(
495 module.code_file().starts_with("C:\\Windows\\System32\\"),
496 "code file without full path"
497 );
498 assert!(module.debug_file().unwrap().ends_with(".pdb"));
499 }
500
501 let modules = scrubber.other_modules(Which::Scrubbed);
502 for module in modules {
503 assert!(
504 module.code_file().starts_with("*******************\\"),
505 "code file path not scrubbed"
506 );
507 assert!(module.debug_file().unwrap().ends_with(".pdb"));
508 }
509 }
510
511 #[test]
512 fn test_module_list_removed_lin() {
513 let scrubber = TestScrubber::new(
514 "linux.dmp",
515 include_bytes!("../../tests/fixtures/linux.dmp"),
516 serde_json::json!(
517 {
518 "applications": {
519 "debug_file": ["@anything:mask"],
520 "$attachments.*.code_file": ["@anything:mask"]
521 }
522 }
523 ),
524 );
525
526 let main = scrubber.main_module(Which::Original);
527 assert_eq!(main.code_file(), "/work/linux/build/crash");
528 assert_eq!(main.debug_file().unwrap(), "/work/linux/build/crash");
529
530 let main = scrubber.main_module(Which::Scrubbed);
531 assert_eq!(main.code_file(), "*****************/crash");
532 assert_eq!(main.debug_file().unwrap(), "*****************/crash");
533
534 let modules = scrubber.other_modules(Which::Original);
535 for module in modules {
536 assert!(
537 module.code_file().matches('/').count() > 1
538 || module.code_file() == "linux-gate.so",
539 "code file does not contain path"
540 );
541 assert!(
542 module.debug_file().unwrap().matches('/').count() > 1
543 || module.debug_file().unwrap() == "linux-gate.so",
544 "debug file does not contain a path"
545 );
546 }
547
548 let modules = scrubber.other_modules(Which::Scrubbed);
549 for module in modules {
550 assert!(
551 module.code_file().matches('/').count() == 1
552 || module.code_file() == "linux-gate.so",
553 "code file not scrubbed"
554 );
555 assert!(
556 module.debug_file().unwrap().matches('/').count() == 1
557 || module.debug_file().unwrap() == "linux-gate.so",
558 "scrubbed debug file contains a path"
559 );
560 }
561 }
562
563 #[test]
564 fn test_module_list_removed_mac() {
565 let scrubber = TestScrubber::new(
566 "macos.dmp",
567 include_bytes!("../../tests/fixtures/macos.dmp"),
568 serde_json::json!(
569 {
570 "applications": {
571 "debug_file": ["@anything:mask"],
572 "$attachments.*.code_file": ["@anything:mask"]
573 }
574 }
575 ),
576 );
577
578 let main = scrubber.main_module(Which::Original);
579 assert_eq!(
580 main.code_file(),
581 "/Users/travis/build/getsentry/breakpad-tools/macos/build/./crash"
582 );
583 assert_eq!(main.debug_file().unwrap(), "crash");
584
585 let main = scrubber.main_module(Which::Scrubbed);
586 assert_eq!(
587 main.code_file(),
588 "**********************************************************/crash"
589 );
590 assert_eq!(main.debug_file().unwrap(), "crash");
591
592 let modules = scrubber.other_modules(Which::Original);
593 for module in modules {
594 assert!(
595 module.code_file().matches('/').count() > 1,
596 "code file does not contain path"
597 );
598 assert!(
599 module.debug_file().unwrap().matches('/').count() == 0,
600 "debug file contains a path"
601 );
602 }
603
604 let modules = scrubber.other_modules(Which::Scrubbed);
605 for module in modules {
606 assert!(
607 module.code_file().matches('/').count() == 1,
608 "code file not scrubbed"
609 );
610 assert!(
611 module.debug_file().unwrap().matches('/').count() == 0,
612 "scrubbed debug file contains a path"
613 );
614 }
615 }
616
617 #[test]
618 fn test_module_list_selectors() {
619 let scrubber = TestScrubber::new(
621 "linux.dmp",
622 include_bytes!("../../tests/fixtures/linux.dmp"),
623 serde_json::json!(
624 {
625 "applications": {
626 "$string": ["@anything:mask"],
627 }
628 }
629 ),
630 );
631 let main = scrubber.main_module(Which::Scrubbed);
632 assert_eq!(main.code_file(), "*****************/crash");
633 assert_eq!(main.debug_file().unwrap(), "*****************/crash");
634 }
635
636 #[test]
637 fn test_stack_scrubbing_backwards_compatible_selector() {
638 let scrubber = TestScrubber::new(
641 "linux.dmp",
642 include_bytes!("../../tests/fixtures/linux.dmp"),
643 serde_json::json!(
644 {
645 "applications": {
646 "$stack_memory": ["@anything:mask"],
647 }
648 }
649 ),
650 );
651 for stack in scrubber.stacks(Which::Scrubbed) {
652 assert!(stack.iter().all(|b| *b == b'*'));
653 }
654 }
655
656 #[test]
657 fn test_stack_scrubbing_path_item_selector() {
658 let scrubber = TestScrubber::new(
659 "linux.dmp",
660 include_bytes!("../../tests/fixtures/linux.dmp"),
661 serde_json::json!(
662 {
663 "applications": {
664 "$minidump.stack_memory": ["@anything:mask"],
665 }
666 }
667 ),
668 );
669 for stack in scrubber.stacks(Which::Scrubbed) {
670 assert!(stack.iter().all(|b| *b == b'*'));
671 }
672 }
673
674 #[test]
675 #[should_panic]
676 fn test_stack_scrubbing_valuetype_selector() {
677 let scrubber = TestScrubber::new(
680 "linux.dmp",
681 include_bytes!("../../tests/fixtures/linux.dmp"),
682 serde_json::json!(
683 {
684 "applications": {
685 "$minidump.$binary": ["@anything:mask"],
686 }
687 }
688 ),
689 );
690 for stack in scrubber.stacks(Which::Scrubbed) {
691 assert!(stack.iter().all(|b| *b == b'*'));
692 }
693 }
694
695 #[test]
696 fn test_stack_scrubbing_valuetype_not_fully_qualified() {
697 let scrubber = TestScrubber::new(
699 "linux.dmp",
700 include_bytes!("../../tests/fixtures/linux.dmp"),
701 serde_json::json!(
702 {
703 "applications": {
704 "$binary": ["@anything:mask"],
705 }
706 }
707 ),
708 );
709 for (scrubbed_stack, original_stack) in scrubber
710 .stacks(Which::Scrubbed)
711 .iter()
712 .zip(scrubber.stacks(Which::Original).iter())
713 {
714 assert_eq!(scrubbed_stack, original_stack);
715 }
716 }
717
718 #[test]
719 #[should_panic]
720 fn test_stack_scrubbing_wildcard() {
721 let scrubber = TestScrubber::new(
724 "linux.dmp",
725 include_bytes!("../../tests/fixtures/linux.dmp"),
726 serde_json::json!(
727 {
728 "applications": {
729 "$minidump.*": ["@anything:mask"],
730 }
731 }
732 ),
733 );
734 for (scrubbed_stack, original_stack) in scrubber
735 .stacks(Which::Scrubbed)
736 .iter()
737 .zip(scrubber.stacks(Which::Original).iter())
738 {
739 assert_eq!(scrubbed_stack, original_stack);
740 }
741 }
742
743 #[test]
744 fn test_stack_scrubbing_deep_wildcard() {
745 let scrubber = TestScrubber::new(
747 "linux.dmp",
748 include_bytes!("../../tests/fixtures/linux.dmp"),
749 serde_json::json!(
750 {
751 "applications": {
752 "$attachments.**": ["@anything:mask"],
753 }
754 }
755 ),
756 );
757 for (scrubbed_stack, original_stack) in scrubber
758 .stacks(Which::Scrubbed)
759 .iter()
760 .zip(scrubber.stacks(Which::Original).iter())
761 {
762 assert_eq!(scrubbed_stack, original_stack);
763 }
764 }
765
766 #[test]
767 fn test_stack_scrubbing_binary_not_stack() {
768 let scrubber = TestScrubber::new(
769 "linux.dmp",
770 include_bytes!("../../tests/fixtures/linux.dmp"),
771 serde_json::json!(
772 {
773 "applications": {
774 "$binary && !stack_memory": ["@anything:mask"],
775 }
776 }
777 ),
778 );
779 for (scrubbed_stack, original_stack) in scrubber
780 .stacks(Which::Scrubbed)
781 .iter()
782 .zip(scrubber.stacks(Which::Original).iter())
783 {
784 assert_eq!(scrubbed_stack, original_stack);
785 }
786 for heap in scrubber.heaps(Which::Scrubbed) {
787 assert!(heap.iter().all(|b| *b == b'*'));
788 }
789 }
790
791 #[test]
792 fn test_linux_environ_valuetype() {
793 let scrubber = TestScrubber::new(
795 "linux.dmp",
796 include_bytes!("../../tests/fixtures/linux.dmp"),
797 serde_json::json!(
798 {
799 "applications": {
800 "$binary": ["@anything:mask"],
801 }
802 }
803 ),
804 );
805 let environ = scrubber.environ(Which::Scrubbed);
806 assert!(environ.iter().all(|b| *b == b'*'));
807 }
808}