use std::borrow::Cow;
use std::num::TryFromIntError;
use std::ops::Range;
use std::str::Utf8Error;
use minidump::format::{
CvSignature, MINIDUMP_LOCATION_DESCRIPTOR, MINIDUMP_STREAM_TYPE as StreamType,
};
use minidump::{
Endian, Error as MinidumpError, Minidump, MinidumpMemoryList, MinidumpModuleList,
MinidumpThreadList,
};
use num_traits::FromPrimitive;
use relay_event_schema::processor::{FieldAttrs, Pii, ValueType};
use utf16string::{Utf16Error, WStr};
use crate::{PiiAttachmentsProcessor, ScrubEncodings};
#[derive(Debug, thiserror::Error)]
pub enum ScrubMinidumpError {
#[error("failed to parse minidump")]
InvalidMinidump(#[from] MinidumpError),
#[error("invalid memory address")]
InvalidAddress,
#[error("minidump offsets out of usize range")]
OutOfRange,
#[error("string decoding error")]
Decoding,
}
impl From<TryFromIntError> for ScrubMinidumpError {
fn from(_source: TryFromIntError) -> Self {
Self::OutOfRange
}
}
impl From<Utf16Error> for ScrubMinidumpError {
fn from(_source: Utf16Error) -> Self {
Self::Decoding
}
}
impl From<Utf8Error> for ScrubMinidumpError {
fn from(_source: Utf8Error) -> Self {
Self::Decoding
}
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
enum MinidumpItem {
StackMemory(Range<usize>),
NonStackMemory(Range<usize>),
LinuxEnviron(Range<usize>),
LinuxCmdLine(Range<usize>),
CodeModuleName(Range<usize>),
DebugModuleName(Range<usize>),
}
struct MinidumpData<'a> {
data: &'a [u8],
minidump: Minidump<'a, &'a [u8]>,
}
impl<'a> MinidumpData<'a> {
fn parse(data: &'a [u8]) -> Result<Self, ScrubMinidumpError> {
let minidump = Minidump::read(data).map_err(ScrubMinidumpError::InvalidMinidump)?;
Ok(Self { data, minidump })
}
fn offset(&self, slice: &[u8]) -> Option<usize> {
let base = self.data.as_ptr() as usize;
let pointer = slice.as_ptr() as usize;
if pointer > base {
Some(pointer - base)
} else {
None
}
}
fn slice_range(&self, slice: &[u8]) -> Option<Range<usize>> {
let start = self.offset(slice)?;
let end = start + slice.len();
Some(start..end)
}
fn location_range(
&self,
location: MINIDUMP_LOCATION_DESCRIPTOR,
) -> Result<Range<usize>, ScrubMinidumpError> {
let start: usize = location.rva.try_into()?;
let len: usize = location.data_size.try_into()?;
Ok(start..start + len)
}
fn raw_stream_range(
&self,
stream_type: StreamType,
) -> Result<Option<Range<usize>>, ScrubMinidumpError> {
let range = match self.minidump.get_raw_stream(stream_type.into()) {
Ok(stream) => Some(
self.slice_range(stream)
.ok_or(ScrubMinidumpError::InvalidAddress)?,
),
Err(MinidumpError::StreamNotFound) => None,
Err(e) => return Err(ScrubMinidumpError::InvalidMinidump(e)),
};
Ok(range)
}
fn items(&self) -> Result<Vec<MinidumpItem>, ScrubMinidumpError> {
let mut items = Vec::new();
let thread_list: MinidumpThreadList = self.minidump.get_stream()?;
let mem_list: MinidumpMemoryList = self.minidump.get_stream()?;
for mem in mem_list.iter() {
if thread_list
.threads
.iter()
.any(|t| t.raw.stack.memory.rva == mem.desc.memory.rva)
{
items.push(MinidumpItem::StackMemory(
self.location_range(mem.desc.memory)?,
));
} else {
items.push(MinidumpItem::NonStackMemory(
self.location_range(mem.desc.memory)?,
));
}
}
if let Some(range) = self.raw_stream_range(StreamType::LinuxEnviron)? {
items.push(MinidumpItem::LinuxEnviron(range));
}
if let Some(range) = self.raw_stream_range(StreamType::LinuxCmdLine)? {
items.push(MinidumpItem::LinuxCmdLine(range));
}
let mod_list: MinidumpModuleList = self.minidump.get_stream()?;
let mut rvas = Vec::new();
for module in mod_list.iter() {
let rva: usize = module.raw.module_name_rva.try_into()?;
if rvas.contains(&rva) {
continue;
} else {
rvas.push(rva);
}
let len_bytes = self
.data
.get(rva..)
.ok_or(ScrubMinidumpError::InvalidAddress)?;
let len: usize = u32_from_bytes(len_bytes, self.minidump.endian)?.try_into()?;
let start: usize = rva + 4;
items.push(MinidumpItem::CodeModuleName(start..start + len));
let codeview_loc = module.raw.cv_record;
let cv_start: usize = codeview_loc.rva.try_into()?;
let cv_len: usize = codeview_loc.data_size.try_into()?;
let signature_bytes = self
.data
.get(cv_start..)
.ok_or(ScrubMinidumpError::InvalidAddress)?;
let signature = u32_from_bytes(signature_bytes, self.minidump.endian)?;
match CvSignature::from_u32(signature) {
Some(CvSignature::Pdb70) => {
let offset: usize = 4 + (4 + 2 + 2 + 8) + 4; items.push(MinidumpItem::DebugModuleName(
(cv_start + offset)..(cv_start + cv_len),
));
}
Some(CvSignature::Pdb20) => {
let offset: usize = 4 + 4 + 4 + 4; items.push(MinidumpItem::DebugModuleName(
(cv_start + offset)..(cv_start + cv_len),
));
}
_ => {}
}
}
Ok(items)
}
}
fn u32_from_bytes(bytes: &[u8], endian: Endian) -> Result<u32, ScrubMinidumpError> {
let mut buf = [0u8; 4];
buf.copy_from_slice(bytes.get(..4).ok_or(ScrubMinidumpError::InvalidAddress)?);
match endian {
Endian::Little => Ok(u32::from_le_bytes(buf)),
Endian::Big => Ok(u32::from_be_bytes(buf)),
}
}
impl PiiAttachmentsProcessor<'_> {
pub fn scrub_minidump(
&self,
filename: &str,
data: &mut [u8],
) -> Result<bool, ScrubMinidumpError> {
let file_state = self.state(filename, ValueType::Minidump);
let items = MinidumpData::parse(data)?.items()?;
let mut changed = false;
for item in items {
match item {
MinidumpItem::StackMemory(range) => {
let slice = data
.get_mut(range)
.ok_or(ScrubMinidumpError::InvalidAddress)?;
let attrs = Cow::Owned(FieldAttrs::new().pii(Pii::Maybe));
let state = file_state.enter_static(
"stack_memory",
Some(attrs),
ValueType::Binary | ValueType::StackMemory,
);
changed |= self.scrub_bytes(slice, &state, ScrubEncodings::All);
}
MinidumpItem::NonStackMemory(range) => {
let slice = data
.get_mut(range)
.ok_or(ScrubMinidumpError::InvalidAddress)?;
let attrs = Cow::Owned(FieldAttrs::new().pii(Pii::True));
let state = file_state.enter_static(
"heap_memory",
Some(attrs),
ValueType::Binary | ValueType::HeapMemory,
);
changed |= self.scrub_bytes(slice, &state, ScrubEncodings::All);
}
MinidumpItem::LinuxEnviron(range) | MinidumpItem::LinuxCmdLine(range) => {
let slice = data
.get_mut(range)
.ok_or(ScrubMinidumpError::InvalidAddress)?;
let attrs = Cow::Owned(FieldAttrs::new().pii(Pii::True));
let state = file_state.enter_static("", Some(attrs), Some(ValueType::Binary));
changed |= self.scrub_bytes(slice, &state, ScrubEncodings::All);
}
MinidumpItem::CodeModuleName(range) => {
let slice = data
.get_mut(range)
.ok_or(ScrubMinidumpError::InvalidAddress)?;
let attrs = Cow::Owned(FieldAttrs::new().pii(Pii::True));
let state =
file_state.enter_static("code_file", Some(attrs), Some(ValueType::String));
let wstr = WStr::from_utf16le_mut(slice)?; changed |= self.scrub_utf16_filepath(wstr, &state);
}
MinidumpItem::DebugModuleName(range) => {
let slice = data
.get_mut(range)
.ok_or(ScrubMinidumpError::InvalidAddress)?;
let attrs = Cow::Owned(FieldAttrs::new().pii(Pii::True));
let state =
file_state.enter_static("debug_file", Some(attrs), Some(ValueType::String));
let s = std::str::from_utf8_mut(slice)?;
changed |= self.scrub_utf8_filepath(s, &state);
}
};
}
Ok(changed)
}
}
#[cfg(test)]
mod tests {
use minidump::format::RVA;
use minidump::{MinidumpModule, Module};
use super::*;
use crate::config::PiiConfig;
struct TestScrubber {
orig_dump: Minidump<'static, &'static [u8]>,
_scrubbed_data: Vec<u8>,
scrubbed_dump: Minidump<'static, &'static [u8]>,
}
impl TestScrubber {
fn new(filename: &str, orig_data: &'static [u8], json: serde_json::Value) -> Self {
let orig_dump = Minidump::read(orig_data).expect("original minidump failed to parse");
let mut scrubbed_data = Vec::from(orig_data);
let config = serde_json::from_value::<PiiConfig>(json).expect("invalid config json");
let processor = PiiAttachmentsProcessor::new(config.compiled());
processor
.scrub_minidump(filename, scrubbed_data.as_mut_slice())
.expect("scrubbing failed");
let slice =
unsafe { std::mem::transmute::<&[u8], &'static [u8]>(scrubbed_data.as_slice()) };
let scrubbed_dump = Minidump::read(slice).expect("scrubbed minidump failed to parse");
Self {
orig_dump,
_scrubbed_data: scrubbed_data,
scrubbed_dump,
}
}
}
enum Which {
Original,
Scrubbed,
}
enum MemRegion {
Stack,
Heap,
}
impl TestScrubber {
fn main_module(&self, which: Which) -> MinidumpModule {
let dump = match which {
Which::Original => &self.orig_dump,
Which::Scrubbed => &self.scrubbed_dump,
};
let modules: MinidumpModuleList = dump.get_stream().unwrap();
modules.main_module().unwrap().clone()
}
fn other_modules(&self, which: Which) -> Vec<MinidumpModule> {
let dump = match which {
Which::Original => &self.orig_dump,
Which::Scrubbed => &self.scrubbed_dump,
};
let modules: MinidumpModuleList = dump.get_stream().unwrap();
let mut iter = modules.iter();
iter.next(); iter.cloned().collect()
}
fn memory_regions<'slf>(&'slf self, which: Which, region: MemRegion) -> Vec<&'slf [u8]> {
let dump: &'slf Minidump<&'static [u8]> = match which {
Which::Original => &self.orig_dump,
Which::Scrubbed => &self.scrubbed_dump,
};
let thread_list: MinidumpThreadList = dump.get_stream().unwrap();
let stack_rvas: Vec<RVA> = thread_list
.threads
.iter()
.map(|t| t.raw.stack.memory.rva)
.collect();
let mem_list: MinidumpMemoryList<'slf> = dump.get_stream().unwrap();
mem_list
.iter()
.filter(|mem| match region {
MemRegion::Stack => stack_rvas.contains(&mem.desc.memory.rva),
MemRegion::Heap => !stack_rvas.contains(&mem.desc.memory.rva),
})
.map(|mem| unsafe { std::mem::transmute(mem.bytes) })
.collect()
}
fn stacks(&self, which: Which) -> Vec<&[u8]> {
self.memory_regions(which, MemRegion::Stack)
}
fn heaps(&self, which: Which) -> Vec<&[u8]> {
self.memory_regions(which, MemRegion::Heap)
}
fn environ(&self, which: Which) -> &[u8] {
let dump = match which {
Which::Original => &self.orig_dump,
Which::Scrubbed => &self.scrubbed_dump,
};
dump.get_raw_stream(StreamType::LinuxEnviron.into())
.unwrap()
}
}
#[test]
fn test_module_list_removed_win() {
let scrubber = TestScrubber::new(
"windows.dmp",
include_bytes!("../../tests/fixtures/windows.dmp"),
serde_json::json!(
{
"applications": {
"debug_file": ["@anything:mask"],
"$attachments.'windows.dmp'.code_file": ["@anything:mask"]
}
}
),
);
let main = scrubber.main_module(Which::Original);
assert_eq!(
main.code_file(),
"C:\\projects\\breakpad-tools\\windows\\Release\\crash.exe"
);
assert_eq!(
main.debug_file().unwrap(),
"C:\\projects\\breakpad-tools\\windows\\Release\\crash.pdb"
);
let main = scrubber.main_module(Which::Scrubbed);
assert_eq!(
main.code_file(),
"******************************************\\crash.exe"
);
assert_eq!(
main.debug_file().unwrap(),
"******************************************\\crash.pdb"
);
let modules = scrubber.other_modules(Which::Original);
for module in modules {
assert!(
module.code_file().starts_with("C:\\Windows\\System32\\"),
"code file without full path"
);
assert!(module.debug_file().unwrap().ends_with(".pdb"));
}
let modules = scrubber.other_modules(Which::Scrubbed);
for module in modules {
assert!(
module.code_file().starts_with("*******************\\"),
"code file path not scrubbed"
);
assert!(module.debug_file().unwrap().ends_with(".pdb"));
}
}
#[test]
fn test_module_list_removed_lin() {
let scrubber = TestScrubber::new(
"linux.dmp",
include_bytes!("../../tests/fixtures/linux.dmp"),
serde_json::json!(
{
"applications": {
"debug_file": ["@anything:mask"],
"$attachments.*.code_file": ["@anything:mask"]
}
}
),
);
let main = scrubber.main_module(Which::Original);
assert_eq!(main.code_file(), "/work/linux/build/crash");
assert_eq!(main.debug_file().unwrap(), "/work/linux/build/crash");
let main = scrubber.main_module(Which::Scrubbed);
assert_eq!(main.code_file(), "*****************/crash");
assert_eq!(main.debug_file().unwrap(), "*****************/crash");
let modules = scrubber.other_modules(Which::Original);
for module in modules {
assert!(
module.code_file().matches('/').count() > 1
|| module.code_file() == "linux-gate.so",
"code file does not contain path"
);
assert!(
module.debug_file().unwrap().matches('/').count() > 1
|| module.debug_file().unwrap() == "linux-gate.so",
"debug file does not contain a path"
);
}
let modules = scrubber.other_modules(Which::Scrubbed);
for module in modules {
assert!(
module.code_file().matches('/').count() == 1
|| module.code_file() == "linux-gate.so",
"code file not scrubbed"
);
assert!(
module.debug_file().unwrap().matches('/').count() == 1
|| module.debug_file().unwrap() == "linux-gate.so",
"scrubbed debug file contains a path"
);
}
}
#[test]
fn test_module_list_removed_mac() {
let scrubber = TestScrubber::new(
"macos.dmp",
include_bytes!("../../tests/fixtures/macos.dmp"),
serde_json::json!(
{
"applications": {
"debug_file": ["@anything:mask"],
"$attachments.*.code_file": ["@anything:mask"]
}
}
),
);
let main = scrubber.main_module(Which::Original);
assert_eq!(
main.code_file(),
"/Users/travis/build/getsentry/breakpad-tools/macos/build/./crash"
);
assert_eq!(main.debug_file().unwrap(), "crash");
let main = scrubber.main_module(Which::Scrubbed);
assert_eq!(
main.code_file(),
"**********************************************************/crash"
);
assert_eq!(main.debug_file().unwrap(), "crash");
let modules = scrubber.other_modules(Which::Original);
for module in modules {
assert!(
module.code_file().matches('/').count() > 1,
"code file does not contain path"
);
assert!(
module.debug_file().unwrap().matches('/').count() == 0,
"debug file contains a path"
);
}
let modules = scrubber.other_modules(Which::Scrubbed);
for module in modules {
assert!(
module.code_file().matches('/').count() == 1,
"code file not scrubbed"
);
assert!(
module.debug_file().unwrap().matches('/').count() == 0,
"scrubbed debug file contains a path"
);
}
}
#[test]
fn test_module_list_selectors() {
let scrubber = TestScrubber::new(
"linux.dmp",
include_bytes!("../../tests/fixtures/linux.dmp"),
serde_json::json!(
{
"applications": {
"$string": ["@anything:mask"],
}
}
),
);
let main = scrubber.main_module(Which::Scrubbed);
assert_eq!(main.code_file(), "*****************/crash");
assert_eq!(main.debug_file().unwrap(), "*****************/crash");
}
#[test]
fn test_stack_scrubbing_backwards_compatible_selector() {
let scrubber = TestScrubber::new(
"linux.dmp",
include_bytes!("../../tests/fixtures/linux.dmp"),
serde_json::json!(
{
"applications": {
"$stack_memory": ["@anything:mask"],
}
}
),
);
for stack in scrubber.stacks(Which::Scrubbed) {
assert!(stack.iter().all(|b| *b == b'*'));
}
}
#[test]
fn test_stack_scrubbing_path_item_selector() {
let scrubber = TestScrubber::new(
"linux.dmp",
include_bytes!("../../tests/fixtures/linux.dmp"),
serde_json::json!(
{
"applications": {
"$minidump.stack_memory": ["@anything:mask"],
}
}
),
);
for stack in scrubber.stacks(Which::Scrubbed) {
assert!(stack.iter().all(|b| *b == b'*'));
}
}
#[test]
#[should_panic]
fn test_stack_scrubbing_valuetype_selector() {
let scrubber = TestScrubber::new(
"linux.dmp",
include_bytes!("../../tests/fixtures/linux.dmp"),
serde_json::json!(
{
"applications": {
"$minidump.$binary": ["@anything:mask"],
}
}
),
);
for stack in scrubber.stacks(Which::Scrubbed) {
assert!(stack.iter().all(|b| *b == b'*'));
}
}
#[test]
fn test_stack_scrubbing_valuetype_not_fully_qualified() {
let scrubber = TestScrubber::new(
"linux.dmp",
include_bytes!("../../tests/fixtures/linux.dmp"),
serde_json::json!(
{
"applications": {
"$binary": ["@anything:mask"],
}
}
),
);
for (scrubbed_stack, original_stack) in scrubber
.stacks(Which::Scrubbed)
.iter()
.zip(scrubber.stacks(Which::Original).iter())
{
assert_eq!(scrubbed_stack, original_stack);
}
}
#[test]
#[should_panic]
fn test_stack_scrubbing_wildcard() {
let scrubber = TestScrubber::new(
"linux.dmp",
include_bytes!("../../tests/fixtures/linux.dmp"),
serde_json::json!(
{
"applications": {
"$minidump.*": ["@anything:mask"],
}
}
),
);
for (scrubbed_stack, original_stack) in scrubber
.stacks(Which::Scrubbed)
.iter()
.zip(scrubber.stacks(Which::Original).iter())
{
assert_eq!(scrubbed_stack, original_stack);
}
}
#[test]
fn test_stack_scrubbing_deep_wildcard() {
let scrubber = TestScrubber::new(
"linux.dmp",
include_bytes!("../../tests/fixtures/linux.dmp"),
serde_json::json!(
{
"applications": {
"$attachments.**": ["@anything:mask"],
}
}
),
);
for (scrubbed_stack, original_stack) in scrubber
.stacks(Which::Scrubbed)
.iter()
.zip(scrubber.stacks(Which::Original).iter())
{
assert_eq!(scrubbed_stack, original_stack);
}
}
#[test]
fn test_stack_scrubbing_binary_not_stack() {
let scrubber = TestScrubber::new(
"linux.dmp",
include_bytes!("../../tests/fixtures/linux.dmp"),
serde_json::json!(
{
"applications": {
"$binary && !stack_memory": ["@anything:mask"],
}
}
),
);
for (scrubbed_stack, original_stack) in scrubber
.stacks(Which::Scrubbed)
.iter()
.zip(scrubber.stacks(Which::Original).iter())
{
assert_eq!(scrubbed_stack, original_stack);
}
for heap in scrubber.heaps(Which::Scrubbed) {
assert!(heap.iter().all(|b| *b == b'*'));
}
}
#[test]
fn test_linux_environ_valuetype() {
let scrubber = TestScrubber::new(
"linux.dmp",
include_bytes!("../../tests/fixtures/linux.dmp"),
serde_json::json!(
{
"applications": {
"$binary": ["@anything:mask"],
}
}
),
);
let environ = scrubber.environ(Which::Scrubbed);
assert!(environ.iter().all(|b| *b == b'*'));
}
}