1use std::borrow::Cow;
4use std::num::TryFromIntError;
5use std::ops::Range;
6use std::str::Utf8Error;
7
8use minidump::format::{
9 CvSignature, MINIDUMP_LOCATION_DESCRIPTOR, MINIDUMP_STREAM_TYPE as StreamType,
10};
11use minidump::{
12 Endian, Error as MinidumpError, Minidump, MinidumpMemoryList, MinidumpModuleList,
13 MinidumpThreadList,
14};
15use num_traits::FromPrimitive;
16use relay_event_schema::processor::{FieldAttrs, Pii, ValueType};
17use utf16string::{Utf16Error, WStr};
18
19use crate::{PiiAttachmentsProcessor, ScrubEncodings};
20
21#[derive(Debug, thiserror::Error)]
23pub enum ScrubMinidumpError {
24 #[error("failed to parse minidump")]
26 InvalidMinidump(#[from] MinidumpError),
27
28 #[error("invalid memory address")]
30 InvalidAddress,
31
32 #[error("minidump offsets out of usize range")]
34 OutOfRange,
35
36 #[error("string decoding error")]
38 Decoding,
39}
40
41impl From<TryFromIntError> for ScrubMinidumpError {
42 fn from(_source: TryFromIntError) -> Self {
43 Self::OutOfRange
44 }
45}
46
47impl From<Utf16Error> for ScrubMinidumpError {
48 fn from(_source: Utf16Error) -> Self {
49 Self::Decoding
50 }
51}
52
53impl From<Utf8Error> for ScrubMinidumpError {
54 fn from(_source: Utf8Error) -> Self {
55 Self::Decoding
56 }
57}
58
59#[derive(Debug, Clone, Eq, PartialEq, Hash)]
65enum MinidumpItem {
66 StackMemory(Range<usize>),
68 NonStackMemory(Range<usize>),
70 LinuxEnviron(Range<usize>),
74 LinuxCmdLine(Range<usize>),
78 CodeModuleName(Range<usize>),
80 DebugModuleName(Range<usize>),
82}
83
84struct MinidumpData<'a> {
86 data: &'a [u8],
87 minidump: Minidump<'a, &'a [u8]>,
88}
89
90impl<'a> MinidumpData<'a> {
91 fn parse(data: &'a [u8]) -> Result<Self, ScrubMinidumpError> {
96 let minidump = Minidump::read(data).map_err(ScrubMinidumpError::InvalidMinidump)?;
97 Ok(Self { data, minidump })
98 }
99
100 fn offset(&self, slice: &[u8]) -> Option<usize> {
106 let base = self.data.as_ptr() as usize;
107 let pointer = slice.as_ptr() as usize;
108
109 if pointer > base {
110 Some(pointer - base)
111 } else {
112 None
113 }
114 }
115
116 fn slice_range(&self, slice: &[u8]) -> Option<Range<usize>> {
118 let start = self.offset(slice)?;
119 let end = start + slice.len();
120 Some(start..end)
121 }
122
123 fn location_range(
127 &self,
128 location: MINIDUMP_LOCATION_DESCRIPTOR,
129 ) -> Result<Range<usize>, ScrubMinidumpError> {
130 let start: usize = location.rva.try_into()?;
131 let len: usize = location.data_size.try_into()?;
132 Ok(start..start + len)
133 }
134
135 fn raw_stream_range(
137 &self,
138 stream_type: StreamType,
139 ) -> Result<Option<Range<usize>>, ScrubMinidumpError> {
140 let range = match self.minidump.get_raw_stream(stream_type.into()) {
141 Ok(stream) => Some(
142 self.slice_range(stream)
143 .ok_or(ScrubMinidumpError::InvalidAddress)?,
144 ),
145 Err(MinidumpError::StreamNotFound) => None,
146 Err(e) => return Err(ScrubMinidumpError::InvalidMinidump(e)),
147 };
148 Ok(range)
149 }
150
151 fn items(&self) -> Result<Vec<MinidumpItem>, ScrubMinidumpError> {
153 let mut items = Vec::new();
154
155 let thread_list: MinidumpThreadList = self.minidump.get_stream()?;
156
157 let mem_list: MinidumpMemoryList = self.minidump.get_stream()?;
158 for mem in mem_list.iter() {
159 if thread_list
160 .threads
161 .iter()
162 .any(|t| t.raw.stack.memory.rva == mem.desc.memory.rva)
163 {
164 items.push(MinidumpItem::StackMemory(
165 self.location_range(mem.desc.memory)?,
166 ));
167 } else {
168 items.push(MinidumpItem::NonStackMemory(
169 self.location_range(mem.desc.memory)?,
170 ));
171 }
172 }
173
174 if let Some(range) = self.raw_stream_range(StreamType::LinuxEnviron)? {
175 items.push(MinidumpItem::LinuxEnviron(range));
176 }
177 if let Some(range) = self.raw_stream_range(StreamType::LinuxCmdLine)? {
178 items.push(MinidumpItem::LinuxCmdLine(range));
179 }
180
181 let mod_list: MinidumpModuleList = self.minidump.get_stream()?;
182 let mut rvas = Vec::new();
183 for module in mod_list.iter() {
184 let rva: usize = module.raw.module_name_rva.try_into()?;
185 if rvas.contains(&rva) {
186 continue;
187 } else {
188 rvas.push(rva);
189 }
190 let len_bytes = self
191 .data
192 .get(rva..)
193 .ok_or(ScrubMinidumpError::InvalidAddress)?;
194 let len: usize = u32_from_bytes(len_bytes, self.minidump.endian)?.try_into()?;
195 let start: usize = rva + 4;
196 items.push(MinidumpItem::CodeModuleName(start..start + len));
197
198 let codeview_loc = module.raw.cv_record;
200 let cv_start: usize = codeview_loc.rva.try_into()?;
201 let cv_len: usize = codeview_loc.data_size.try_into()?;
202 let signature_bytes = self
203 .data
204 .get(cv_start..)
205 .ok_or(ScrubMinidumpError::InvalidAddress)?;
206 let signature = u32_from_bytes(signature_bytes, self.minidump.endian)?;
207 match CvSignature::from_u32(signature) {
208 Some(CvSignature::Pdb70) => {
209 let offset: usize = 4 + (4 + 2 + 2 + 8) + 4; items.push(MinidumpItem::DebugModuleName(
211 (cv_start + offset)..(cv_start + cv_len),
212 ));
213 }
214 Some(CvSignature::Pdb20) => {
215 let offset: usize = 4 + 4 + 4 + 4; items.push(MinidumpItem::DebugModuleName(
217 (cv_start + offset)..(cv_start + cv_len),
218 ));
219 }
220 _ => {}
221 }
222 }
223
224 Ok(items)
225 }
226}
227
228fn u32_from_bytes(bytes: &[u8], endian: Endian) -> Result<u32, ScrubMinidumpError> {
233 let mut buf = [0u8; 4];
234 buf.copy_from_slice(bytes.get(..4).ok_or(ScrubMinidumpError::InvalidAddress)?);
235 match endian {
236 Endian::Little => Ok(u32::from_le_bytes(buf)),
237 Endian::Big => Ok(u32::from_be_bytes(buf)),
238 }
239}
240
241impl PiiAttachmentsProcessor<'_> {
242 pub fn scrub_minidump(
253 &self,
254 filename: &str,
255 data: &mut [u8],
256 ) -> Result<bool, ScrubMinidumpError> {
257 let file_state = self.state(filename, ValueType::Minidump);
258 let items = MinidumpData::parse(data)?.items()?;
259 let mut changed = false;
260
261 for item in items {
262 match item {
263 MinidumpItem::StackMemory(range) => {
264 let slice = data
267 .get_mut(range)
268 .ok_or(ScrubMinidumpError::InvalidAddress)?;
269
270 let attrs = Cow::Owned(FieldAttrs::new().pii(Pii::Maybe));
271 let state = file_state.enter_static(
272 "stack_memory",
273 Some(attrs),
274 ValueType::Binary | ValueType::StackMemory,
275 );
276 changed |= self.scrub_bytes(slice, &state, ScrubEncodings::All);
277 }
278 MinidumpItem::NonStackMemory(range) => {
279 let slice = data
280 .get_mut(range)
281 .ok_or(ScrubMinidumpError::InvalidAddress)?;
282 let attrs = Cow::Owned(FieldAttrs::new().pii(Pii::True));
283 let state = file_state.enter_static(
284 "heap_memory",
285 Some(attrs),
286 ValueType::Binary | ValueType::HeapMemory,
287 );
288 changed |= self.scrub_bytes(slice, &state, ScrubEncodings::All);
289 }
290 MinidumpItem::LinuxEnviron(range) | MinidumpItem::LinuxCmdLine(range) => {
291 let slice = data
292 .get_mut(range)
293 .ok_or(ScrubMinidumpError::InvalidAddress)?;
294 let attrs = Cow::Owned(FieldAttrs::new().pii(Pii::True));
295 let state = file_state.enter_static("", Some(attrs), Some(ValueType::Binary));
296 changed |= self.scrub_bytes(slice, &state, ScrubEncodings::All);
297 }
298 MinidumpItem::CodeModuleName(range) => {
299 let slice = data
300 .get_mut(range)
301 .ok_or(ScrubMinidumpError::InvalidAddress)?;
302 let attrs = Cow::Owned(FieldAttrs::new().pii(Pii::True));
303 let state =
305 file_state.enter_static("code_file", Some(attrs), Some(ValueType::String));
306 let wstr = WStr::from_utf16le_mut(slice)?; changed |= self.scrub_utf16_filepath(wstr, &state);
308 }
309 MinidumpItem::DebugModuleName(range) => {
310 let slice = data
311 .get_mut(range)
312 .ok_or(ScrubMinidumpError::InvalidAddress)?;
313 let attrs = Cow::Owned(FieldAttrs::new().pii(Pii::True));
314 let state =
316 file_state.enter_static("debug_file", Some(attrs), Some(ValueType::String));
317 let s = std::str::from_utf8_mut(slice)?;
318 changed |= self.scrub_utf8_filepath(s, &state);
319 }
320 };
321 }
322
323 Ok(changed)
324 }
325}
326
327#[cfg(test)]
328mod tests {
329 use minidump::format::RVA;
330 use minidump::{MinidumpModule, Module};
331
332 use super::*;
333 use crate::config::PiiConfig;
334
335 struct TestScrubber {
336 orig_dump: Minidump<'static, &'static [u8]>,
337 _scrubbed_data: Vec<u8>,
338 scrubbed_dump: Minidump<'static, &'static [u8]>,
339 }
340
341 impl TestScrubber {
342 fn new(filename: &str, orig_data: &'static [u8], json: serde_json::Value) -> Self {
343 let orig_dump = Minidump::read(orig_data).expect("original minidump failed to parse");
344 let mut scrubbed_data = Vec::from(orig_data);
345
346 let config = serde_json::from_value::<PiiConfig>(json).expect("invalid config json");
347 let processor = PiiAttachmentsProcessor::new(config.compiled());
348 processor
349 .scrub_minidump(filename, scrubbed_data.as_mut_slice())
350 .expect("scrubbing failed");
351
352 let slice =
357 unsafe { std::mem::transmute::<&[u8], &'static [u8]>(scrubbed_data.as_slice()) };
358 let scrubbed_dump = Minidump::read(slice).expect("scrubbed minidump failed to parse");
359 Self {
360 orig_dump,
361 _scrubbed_data: scrubbed_data,
362 scrubbed_dump,
363 }
364 }
365 }
366
367 enum Which {
368 Original,
369 Scrubbed,
370 }
371
372 enum MemRegion {
373 Stack,
374 Heap,
375 }
376
377 impl TestScrubber {
378 fn main_module(&self, which: Which) -> MinidumpModule {
379 let dump = match which {
380 Which::Original => &self.orig_dump,
381 Which::Scrubbed => &self.scrubbed_dump,
382 };
383 let modules: MinidumpModuleList = dump.get_stream().unwrap();
384 modules.main_module().unwrap().clone()
385 }
386
387 fn other_modules(&self, which: Which) -> Vec<MinidumpModule> {
388 let dump = match which {
389 Which::Original => &self.orig_dump,
390 Which::Scrubbed => &self.scrubbed_dump,
391 };
392 let modules: MinidumpModuleList = dump.get_stream().unwrap();
393 let mut iter = modules.iter();
394 iter.next(); iter.cloned().collect()
396 }
397
398 fn memory_regions<'slf>(&'slf self, which: Which, region: MemRegion) -> Vec<&'slf [u8]> {
400 let dump: &'slf Minidump<&'static [u8]> = match which {
401 Which::Original => &self.orig_dump,
402 Which::Scrubbed => &self.scrubbed_dump,
403 };
404
405 let thread_list: MinidumpThreadList = dump.get_stream().unwrap();
406 let stack_rvas: Vec<RVA> = thread_list
407 .threads
408 .iter()
409 .map(|t| t.raw.stack.memory.rva)
410 .collect();
411
412 let mem_list: MinidumpMemoryList<'slf> = dump.get_stream().unwrap();
418 mem_list
419 .iter()
420 .filter(|mem| match region {
421 MemRegion::Stack => stack_rvas.contains(&mem.desc.memory.rva),
422 MemRegion::Heap => !stack_rvas.contains(&mem.desc.memory.rva),
423 })
424 .map(|mem| unsafe { std::mem::transmute(mem.bytes) })
425 .collect()
426 }
427
428 fn stacks(&self, which: Which) -> Vec<&[u8]> {
430 self.memory_regions(which, MemRegion::Stack)
431 }
432
433 fn heaps(&self, which: Which) -> Vec<&[u8]> {
435 self.memory_regions(which, MemRegion::Heap)
436 }
437
438 fn environ(&self, which: Which) -> &[u8] {
442 let dump = match which {
443 Which::Original => &self.orig_dump,
444 Which::Scrubbed => &self.scrubbed_dump,
445 };
446 dump.get_raw_stream(StreamType::LinuxEnviron.into())
447 .unwrap()
448 }
449 }
450
451 #[test]
452 fn test_module_list_removed_win() {
453 let scrubber = TestScrubber::new(
454 "windows.dmp",
455 include_bytes!("../../tests/fixtures/windows.dmp"),
456 serde_json::json!(
457 {
458 "applications": {
459 "debug_file": ["@anything:mask"],
460 "$attachments.'windows.dmp'.code_file": ["@anything:mask"]
461 }
462 }
463 ),
464 );
465
466 let main = scrubber.main_module(Which::Original);
467 assert_eq!(
468 main.code_file(),
469 "C:\\projects\\breakpad-tools\\windows\\Release\\crash.exe"
470 );
471 assert_eq!(
472 main.debug_file().unwrap(),
473 "C:\\projects\\breakpad-tools\\windows\\Release\\crash.pdb"
474 );
475
476 let main = scrubber.main_module(Which::Scrubbed);
477 assert_eq!(
478 main.code_file(),
479 "******************************************\\crash.exe"
480 );
481 assert_eq!(
482 main.debug_file().unwrap(),
483 "******************************************\\crash.pdb"
484 );
485
486 let modules = scrubber.other_modules(Which::Original);
487 for module in modules {
488 assert!(
489 module.code_file().starts_with("C:\\Windows\\System32\\"),
490 "code file without full path"
491 );
492 assert!(module.debug_file().unwrap().ends_with(".pdb"));
493 }
494
495 let modules = scrubber.other_modules(Which::Scrubbed);
496 for module in modules {
497 assert!(
498 module.code_file().starts_with("*******************\\"),
499 "code file path not scrubbed"
500 );
501 assert!(module.debug_file().unwrap().ends_with(".pdb"));
502 }
503 }
504
505 #[test]
506 fn test_module_list_removed_lin() {
507 let scrubber = TestScrubber::new(
508 "linux.dmp",
509 include_bytes!("../../tests/fixtures/linux.dmp"),
510 serde_json::json!(
511 {
512 "applications": {
513 "debug_file": ["@anything:mask"],
514 "$attachments.*.code_file": ["@anything:mask"]
515 }
516 }
517 ),
518 );
519
520 let main = scrubber.main_module(Which::Original);
521 assert_eq!(main.code_file(), "/work/linux/build/crash");
522 assert_eq!(main.debug_file().unwrap(), "/work/linux/build/crash");
523
524 let main = scrubber.main_module(Which::Scrubbed);
525 assert_eq!(main.code_file(), "*****************/crash");
526 assert_eq!(main.debug_file().unwrap(), "*****************/crash");
527
528 let modules = scrubber.other_modules(Which::Original);
529 for module in modules {
530 assert!(
531 module.code_file().matches('/').count() > 1
532 || module.code_file() == "linux-gate.so",
533 "code file does not contain path"
534 );
535 assert!(
536 module.debug_file().unwrap().matches('/').count() > 1
537 || module.debug_file().unwrap() == "linux-gate.so",
538 "debug file does not contain a path"
539 );
540 }
541
542 let modules = scrubber.other_modules(Which::Scrubbed);
543 for module in modules {
544 assert!(
545 module.code_file().matches('/').count() == 1
546 || module.code_file() == "linux-gate.so",
547 "code file not scrubbed"
548 );
549 assert!(
550 module.debug_file().unwrap().matches('/').count() == 1
551 || module.debug_file().unwrap() == "linux-gate.so",
552 "scrubbed debug file contains a path"
553 );
554 }
555 }
556
557 #[test]
558 fn test_module_list_removed_mac() {
559 let scrubber = TestScrubber::new(
560 "macos.dmp",
561 include_bytes!("../../tests/fixtures/macos.dmp"),
562 serde_json::json!(
563 {
564 "applications": {
565 "debug_file": ["@anything:mask"],
566 "$attachments.*.code_file": ["@anything:mask"]
567 }
568 }
569 ),
570 );
571
572 let main = scrubber.main_module(Which::Original);
573 assert_eq!(
574 main.code_file(),
575 "/Users/travis/build/getsentry/breakpad-tools/macos/build/./crash"
576 );
577 assert_eq!(main.debug_file().unwrap(), "crash");
578
579 let main = scrubber.main_module(Which::Scrubbed);
580 assert_eq!(
581 main.code_file(),
582 "**********************************************************/crash"
583 );
584 assert_eq!(main.debug_file().unwrap(), "crash");
585
586 let modules = scrubber.other_modules(Which::Original);
587 for module in modules {
588 assert!(
589 module.code_file().matches('/').count() > 1,
590 "code file does not contain path"
591 );
592 assert!(
593 module.debug_file().unwrap().matches('/').count() == 0,
594 "debug file contains a path"
595 );
596 }
597
598 let modules = scrubber.other_modules(Which::Scrubbed);
599 for module in modules {
600 assert!(
601 module.code_file().matches('/').count() == 1,
602 "code file not scrubbed"
603 );
604 assert!(
605 module.debug_file().unwrap().matches('/').count() == 0,
606 "scrubbed debug file contains a path"
607 );
608 }
609 }
610
611 #[test]
612 fn test_module_list_selectors() {
613 let scrubber = TestScrubber::new(
615 "linux.dmp",
616 include_bytes!("../../tests/fixtures/linux.dmp"),
617 serde_json::json!(
618 {
619 "applications": {
620 "$string": ["@anything:mask"],
621 }
622 }
623 ),
624 );
625 let main = scrubber.main_module(Which::Scrubbed);
626 assert_eq!(main.code_file(), "*****************/crash");
627 assert_eq!(main.debug_file().unwrap(), "*****************/crash");
628 }
629
630 #[test]
631 fn test_stack_scrubbing_backwards_compatible_selector() {
632 let scrubber = TestScrubber::new(
635 "linux.dmp",
636 include_bytes!("../../tests/fixtures/linux.dmp"),
637 serde_json::json!(
638 {
639 "applications": {
640 "$stack_memory": ["@anything:mask"],
641 }
642 }
643 ),
644 );
645 for stack in scrubber.stacks(Which::Scrubbed) {
646 assert!(stack.iter().all(|b| *b == b'*'));
647 }
648 }
649
650 #[test]
651 fn test_stack_scrubbing_path_item_selector() {
652 let scrubber = TestScrubber::new(
653 "linux.dmp",
654 include_bytes!("../../tests/fixtures/linux.dmp"),
655 serde_json::json!(
656 {
657 "applications": {
658 "$minidump.stack_memory": ["@anything:mask"],
659 }
660 }
661 ),
662 );
663 for stack in scrubber.stacks(Which::Scrubbed) {
664 assert!(stack.iter().all(|b| *b == b'*'));
665 }
666 }
667
668 #[test]
669 #[should_panic]
670 fn test_stack_scrubbing_valuetype_selector() {
671 let scrubber = TestScrubber::new(
674 "linux.dmp",
675 include_bytes!("../../tests/fixtures/linux.dmp"),
676 serde_json::json!(
677 {
678 "applications": {
679 "$minidump.$binary": ["@anything:mask"],
680 }
681 }
682 ),
683 );
684 for stack in scrubber.stacks(Which::Scrubbed) {
685 assert!(stack.iter().all(|b| *b == b'*'));
686 }
687 }
688
689 #[test]
690 fn test_stack_scrubbing_valuetype_not_fully_qualified() {
691 let scrubber = TestScrubber::new(
693 "linux.dmp",
694 include_bytes!("../../tests/fixtures/linux.dmp"),
695 serde_json::json!(
696 {
697 "applications": {
698 "$binary": ["@anything:mask"],
699 }
700 }
701 ),
702 );
703 for (scrubbed_stack, original_stack) in scrubber
704 .stacks(Which::Scrubbed)
705 .iter()
706 .zip(scrubber.stacks(Which::Original).iter())
707 {
708 assert_eq!(scrubbed_stack, original_stack);
709 }
710 }
711
712 #[test]
713 #[should_panic]
714 fn test_stack_scrubbing_wildcard() {
715 let scrubber = TestScrubber::new(
718 "linux.dmp",
719 include_bytes!("../../tests/fixtures/linux.dmp"),
720 serde_json::json!(
721 {
722 "applications": {
723 "$minidump.*": ["@anything:mask"],
724 }
725 }
726 ),
727 );
728 for (scrubbed_stack, original_stack) in scrubber
729 .stacks(Which::Scrubbed)
730 .iter()
731 .zip(scrubber.stacks(Which::Original).iter())
732 {
733 assert_eq!(scrubbed_stack, original_stack);
734 }
735 }
736
737 #[test]
738 fn test_stack_scrubbing_deep_wildcard() {
739 let scrubber = TestScrubber::new(
741 "linux.dmp",
742 include_bytes!("../../tests/fixtures/linux.dmp"),
743 serde_json::json!(
744 {
745 "applications": {
746 "$attachments.**": ["@anything:mask"],
747 }
748 }
749 ),
750 );
751 for (scrubbed_stack, original_stack) in scrubber
752 .stacks(Which::Scrubbed)
753 .iter()
754 .zip(scrubber.stacks(Which::Original).iter())
755 {
756 assert_eq!(scrubbed_stack, original_stack);
757 }
758 }
759
760 #[test]
761 fn test_stack_scrubbing_binary_not_stack() {
762 let scrubber = TestScrubber::new(
763 "linux.dmp",
764 include_bytes!("../../tests/fixtures/linux.dmp"),
765 serde_json::json!(
766 {
767 "applications": {
768 "$binary && !stack_memory": ["@anything:mask"],
769 }
770 }
771 ),
772 );
773 for (scrubbed_stack, original_stack) in scrubber
774 .stacks(Which::Scrubbed)
775 .iter()
776 .zip(scrubber.stacks(Which::Original).iter())
777 {
778 assert_eq!(scrubbed_stack, original_stack);
779 }
780 for heap in scrubber.heaps(Which::Scrubbed) {
781 assert!(heap.iter().all(|b| *b == b'*'));
782 }
783 }
784
785 #[test]
786 fn test_linux_environ_valuetype() {
787 let scrubber = TestScrubber::new(
789 "linux.dmp",
790 include_bytes!("../../tests/fixtures/linux.dmp"),
791 serde_json::json!(
792 {
793 "applications": {
794 "$binary": ["@anything:mask"],
795 }
796 }
797 ),
798 );
799 let environ = scrubber.environ(Which::Scrubbed);
800 assert!(environ.iter().all(|b| *b == b'*'));
801 }
802}