mod redis;
mod resource;
mod sql;
use once_cell::sync::Lazy;
use psl;
use relay_filter::matches_any_origin;
use serde_json::Value;
#[cfg(test)]
pub use sql::{scrub_queries, Mode};
use relay_event_schema::protocol::Span;
use std::borrow::Cow;
use std::net::{Ipv4Addr, Ipv6Addr};
use std::path::Path;
use url::{Host, Url};
use crate::regexes::{
DB_SQL_TRANSACTION_CORE_DATA_REGEX, DB_SUPABASE_REGEX, FUNCTION_NORMALIZER_REGEX,
RESOURCE_NORMALIZER_REGEX,
};
use crate::span::description::redis::matching_redis_command;
use crate::span::description::resource::COMMON_PATH_SEGMENTS;
use crate::span::tag_extraction::HTTP_METHOD_EXTRACTOR_REGEX;
use crate::span::TABLE_NAME_REGEX;
static DUMMY_BASE_URL: Lazy<Url> = Lazy::new(|| "http://replace_me".parse().unwrap());
const MAX_SEGMENT_LENGTH: usize = 25;
const MAX_EXTENSION_LENGTH: usize = 10;
const DOMAIN_ALLOW_LIST: &[&str] = &["localhost"];
pub(crate) fn scrub_span_description(
span: &Span,
span_allowed_hosts: &[String],
) -> (Option<String>, Option<Vec<sqlparser::ast::Statement>>) {
let Some(description) = span.description.as_str() else {
return (None, None);
};
let data = span.data.value();
let db_system = data
.and_then(|data| data.db_system.value())
.and_then(|system| system.as_str());
let span_origin = span.origin.as_str();
let mut parsed_sql = None;
let scrubbed_description = span
.op
.as_str()
.map(|op| op.split_once('.').unwrap_or((op, "")))
.and_then(|(op, sub)| match (op, sub) {
("http", _) => scrub_http(description, span_allowed_hosts),
("cache", _) | ("db", "redis") => scrub_redis_keys(description),
("db", _) if db_system == Some("redis") => scrub_redis_keys(description),
("db", _) if db_system == Some("mongodb") => {
let command = data
.and_then(|data| data.db_operation.value())
.and_then(|command| command.as_str());
let collection = data
.and_then(|data| data.db_collection_name.value())
.and_then(|collection| collection.as_str());
if let (Some(command), Some(collection)) = (command, collection) {
scrub_mongodb_query(description, command, collection)
} else {
None
}
}
("db", sub) => {
if sub.contains("clickhouse")
|| sub.contains("mongodb")
|| sub.contains("redis")
|| is_legacy_activerecord(sub, db_system)
|| is_sql_mongodb(description, db_system)
{
None
} else if span_origin == Some("auto.db.core_data") {
scrub_core_data(description)
} else if sub.contains("prisma") {
Some(description.to_owned())
} else if span_origin == Some("auto.db.supabase")
&& description.starts_with("from(")
{
scrub_supabase(description)
} else {
let (scrubbed, mode) = sql::scrub_queries(db_system, description);
if let sql::Mode::Parsed(ast) = mode {
parsed_sql = Some(ast);
}
scrubbed
}
}
("resource", ty) => scrub_resource(ty, description),
("ai", sub) => match sub.split_once('.').unwrap_or((sub, "")) {
("run" | "pipeline", _) => {
Some(description.to_owned())
}
_ => None,
},
("ui", "load") => {
Some(description.to_owned())
}
("ui", sub) if sub.starts_with("interaction.") || sub.starts_with("react.") => data
.and_then(|data| data.ui_component_name.value())
.and_then(|value| value.as_str())
.map(String::from),
("app", _) => {
Some(description.to_owned())
}
("contentprovider", "load") => {
Some(description.to_owned())
}
("application", "load") => {
Some(description.to_owned())
}
("activity", "load") => {
Some(description.to_owned())
}
("file", _) => scrub_file(description),
("function", _) => scrub_function(description),
_ => None,
});
(scrubbed_description, parsed_sql)
}
fn is_sql_mongodb(description: &str, db_system: Option<&str>) -> bool {
description.contains("\"$")
|| description.contains("({")
|| description.contains("[{")
|| description.starts_with('{')
|| db_system == Some("mongodb")
}
fn is_legacy_activerecord(sub_op: &str, db_system: Option<&str>) -> bool {
db_system.is_none() && (sub_op.contains("active_record") || sub_op.contains("activerecord"))
}
fn scrub_core_data(string: &str) -> Option<String> {
match DB_SQL_TRANSACTION_CORE_DATA_REGEX.replace_all(string, "*") {
Cow::Owned(scrubbed) => Some(scrubbed),
Cow::Borrowed(_) => None,
}
}
fn scrub_supabase(string: &str) -> Option<String> {
Some(DB_SUPABASE_REGEX.replace_all(string, "{%s}").into())
}
fn scrub_http(string: &str, allow_list: &[String]) -> Option<String> {
let (method, url) = string.split_once(' ')?;
if !HTTP_METHOD_EXTRACTOR_REGEX.is_match(method) {
return None;
};
if url.starts_with("data:image/") {
return Some(format!("{method} data:image/*"));
}
let scrubbed = match Url::parse(url) {
Ok(url) => {
let scheme = url.scheme();
let scrubbed_host = url.host().map(|host| scrub_host(host, allow_list));
let domain = concatenate_host_and_port(scrubbed_host.as_deref(), url.port());
format!("{method} {scheme}://{domain}")
}
Err(_) => {
format!("{method} *")
}
};
Some(scrubbed)
}
fn scrub_file(description: &str) -> Option<String> {
let filename = match description.split_once(' ') {
Some((filename, _)) => filename,
_ => description,
};
match Path::new(filename).extension() {
Some(extension) => {
let ext = scrub_resource_file_extension(extension.to_str()?);
if ext != "*" {
Some(format!("*.{ext}"))
} else {
Some("*".to_string())
}
}
_ => Some("*".to_owned()),
}
}
pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [String]) -> Cow<'a, str> {
let allow_list: Vec<_> = allow_list
.iter()
.map(|origin| origin.as_str().into())
.collect();
if matches_any_origin(Some(host.to_string().as_str()), &allow_list) {
return host.to_string().into();
}
match host {
Host::Ipv4(ip) => Cow::Borrowed(scrub_ipv4(ip)),
Host::Ipv6(ip) => Cow::Borrowed(scrub_ipv6(ip)),
Host::Domain(domain) => scrub_domain_name(domain),
}
}
pub fn scrub_ipv4(ip: Ipv4Addr) -> &'static str {
match ip {
Ipv4Addr::LOCALHOST => "127.0.0.1",
_ => "*.*.*.*",
}
}
pub fn scrub_ipv6(ip: Ipv6Addr) -> &'static str {
match ip {
Ipv6Addr::LOCALHOST => "::1",
_ => "*:*:*:*:*:*:*:*",
}
}
pub fn scrub_domain_name(domain: &str) -> Cow<'_, str> {
if DOMAIN_ALLOW_LIST.contains(&domain) {
return Cow::Borrowed(domain);
}
let parsed_domain = psl::domain(domain.as_bytes());
let Some(parsed_domain) = parsed_domain else {
return Cow::Borrowed(domain);
};
let suffix = parsed_domain.suffix().as_bytes();
let Some(second_level_domain) = parsed_domain.as_bytes().strip_suffix(suffix) else {
return Cow::Borrowed(domain);
};
let subdomain = domain
.as_bytes()
.strip_suffix(suffix)
.and_then(|s| s.strip_suffix(second_level_domain));
match subdomain {
None | Some(b"") => Cow::Borrowed(domain),
Some(_subdomain) => {
let scrubbed = [b"*.", second_level_domain, suffix].concat();
match String::from_utf8(scrubbed) {
Ok(s) => Cow::Owned(s),
Err(_) => Cow::Borrowed(domain),
}
}
}
}
pub fn concatenate_host_and_port(host: Option<&str>, port: Option<u16>) -> Cow<str> {
match (host, port) {
(None, _) => Cow::Borrowed(""),
(Some(host), None) => Cow::Borrowed(host),
(Some(host), Some(port)) => Cow::Owned(format!("{host}:{port}")),
}
}
fn scrub_redis_keys(string: &str) -> Option<String> {
let string = string.trim();
Some(match matching_redis_command(string) {
Some(command) => {
let mut command = command.to_uppercase();
match string.get(command.len()..) {
None | Some("") => command,
Some(_other) => {
command.push_str(" *");
command
}
}
}
None => "*".to_owned(),
})
}
enum UrlType {
Full,
Absolute,
Relative,
}
fn scrub_resource(resource_type: &str, string: &str) -> Option<String> {
let (url, ty) = match Url::parse(string) {
Ok(url) => (url, UrlType::Full),
Err(url::ParseError::RelativeUrlWithoutBase) => {
match Url::options().base_url(Some(&DUMMY_BASE_URL)).parse(string) {
Ok(url) => (
url,
if string.starts_with('/') {
UrlType::Absolute
} else {
UrlType::Relative
},
),
Err(_) => return None,
}
}
Err(_) => {
return None;
}
};
let formatted = match url.scheme() {
"data" => match url.path().split_once(';') {
Some((ty, _data)) => format!("data:{ty}"),
None => "data:*/*".to_owned(),
},
"chrome-extension" | "moz-extension" | "ms-browser-extension" => {
return Some("browser-extension://*".to_owned());
}
scheme => {
let scrubbed_host = url.host().map(|host| scrub_host(host, &[]));
let domain = concatenate_host_and_port(scrubbed_host.as_deref(), url.port());
let segment_count = url.path_segments().map(|s| s.count()).unwrap_or_default();
let mut output_segments = vec![];
for (i, segment) in url.path_segments().into_iter().flatten().enumerate() {
if i + 1 == segment_count {
break;
}
if COMMON_PATH_SEGMENTS.contains(segment) {
output_segments.push(segment);
} else if output_segments.last().is_none_or(|s| *s != "*") {
output_segments.push("*");
}
}
let segments = output_segments.join("/");
let last_segment = url
.path_segments()
.and_then(|s| s.last())
.unwrap_or_default();
let last_segment = scrub_resource_filename(resource_type, last_segment);
if segments.is_empty() {
format!("{scheme}://{domain}/{last_segment}")
} else {
format!("{scheme}://{domain}/{segments}/{last_segment}")
}
}
};
let formatted = match ty {
UrlType::Full => formatted,
UrlType::Absolute => formatted.replace("http://replace_me", ""),
UrlType::Relative => formatted.replace("http://replace_me/", ""),
};
Some(formatted)
}
fn scrub_resource_filename<'a>(ty: &str, path: &'a str) -> Cow<'a, str> {
if path.is_empty() {
return Cow::Borrowed("");
}
let (mut basename, mut extension) = path.rsplit_once('.').unwrap_or((path, ""));
if extension.contains('/') {
basename = path;
extension = "";
}
let extension = scrub_resource_file_extension(extension);
let basename = if ty == "img" {
Cow::Borrowed("*")
} else {
scrub_resource_segment(basename)
};
if extension.is_empty() {
basename
} else {
let mut filename = basename.to_string();
filename.push('.');
filename.push_str(extension);
Cow::Owned(filename)
}
}
fn scrub_resource_segment(segment: &str) -> Cow<str> {
let segment = RESOURCE_NORMALIZER_REGEX.replace_all(segment, "$pre*$post");
if segment.len() > MAX_SEGMENT_LENGTH {
return Cow::Borrowed("*");
}
let mut all_alphabetic = true;
let mut found_uppercase = false;
for char in segment.chars() {
if !char.is_ascii_alphabetic() {
all_alphabetic = false;
}
if char.is_ascii_uppercase() {
found_uppercase = true;
}
if char.is_numeric() || "&%#=+@".contains(char) {
return Cow::Borrowed("*");
};
}
if all_alphabetic && found_uppercase {
return Cow::Borrowed("*");
}
segment
}
fn scrub_resource_file_extension(mut extension: &str) -> &str {
let mut digits = 0;
for (i, byte) in extension.bytes().enumerate() {
if byte.is_ascii_digit() {
digits += 1;
}
if digits > 1 {
return "*";
}
if !byte.is_ascii_alphanumeric() {
extension = &extension[..i];
break;
}
}
if extension.len() > MAX_EXTENSION_LENGTH {
extension = "*";
}
extension
}
fn scrub_function(string: &str) -> Option<String> {
Some(FUNCTION_NORMALIZER_REGEX.replace_all(string, "*").into())
}
fn scrub_mongodb_query(query: &str, command: &str, collection: &str) -> Option<String> {
let mut query: Value = serde_json::from_str(query).ok()?;
let root = query.as_object_mut()?;
root.remove("buffer");
for value in root.values_mut() {
scrub_mongodb_visit_node(value, 3);
}
let scrubbed_collection_name =
if let Cow::Owned(s) = TABLE_NAME_REGEX.replace_all(collection, "{%s}") {
s
} else {
collection.to_owned()
};
root.insert(command.to_owned(), Value::String(scrubbed_collection_name));
Some(query.to_string())
}
fn scrub_mongodb_visit_node(value: &mut Value, recursion_limit: usize) {
if recursion_limit == 0 {
match value {
Value::String(str) => {
str.clear();
str.push('?');
}
value => *value = Value::String("?".to_owned()),
}
return;
}
match value {
Value::Object(map) => {
for value in map.values_mut() {
scrub_mongodb_visit_node(value, recursion_limit - 1);
}
}
Value::Array(arr) => {
arr.clear();
arr.push(Value::String("...".to_owned()));
}
Value::String(str) => {
str.clear();
str.push('?');
}
value => *value = Value::String("?".to_owned()),
}
}
#[cfg(test)]
mod tests {
use super::*;
use relay_protocol::Annotated;
use similar_asserts::assert_eq;
macro_rules! span_description_test {
($name:ident, $description_in:expr, $op_in:literal, $expected:literal) => {
#[test]
fn $name() {
let json = format!(
r#"
{{
"description": "",
"span_id": "bd2eb23da2beb459",
"start_timestamp": 1597976393.4619668,
"timestamp": 1597976393.4718769,
"trace_id": "ff62a8b040f340bda5d830223def1d81",
"op": "{}"
}}
"#,
$op_in
);
let mut span = Annotated::<Span>::from_json(&json).unwrap();
span.value_mut()
.as_mut()
.unwrap()
.description
.set_value(Some($description_in.into()));
let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
if $expected == "" {
assert!(scrubbed.0.is_none());
} else {
assert_eq!($expected, scrubbed.0.unwrap());
}
}
};
}
macro_rules! span_description_test_with_lowercase {
($name:ident, $name2:ident, $description_in:expr, $op_in:literal, $expected:literal) => {
span_description_test!($name, $description_in, $op_in, $expected);
span_description_test!($name2, ($description_in).to_lowercase(), $op_in, $expected);
};
}
span_description_test!(empty, "", "http.client", "");
span_description_test!(
only_domain,
"GET http://service.io",
"http.client",
"GET http://service.io"
);
span_description_test!(
only_urllike_on_http_ops,
"GET https://www.service.io/resources/01234",
"http.client",
"GET https://*.service.io"
);
span_description_test!(
path_ids_end,
"GET https://www.service.io/resources/01234",
"http.client",
"GET https://*.service.io"
);
span_description_test!(
path_ids_middle,
"GET https://www.service.io/resources/01234/details",
"http.client",
"GET https://*.service.io"
);
span_description_test!(
path_multiple_ids,
"GET https://www.service.io/users/01234-qwerty/settings/98765-adfghj",
"http.client",
"GET https://*.service.io"
);
span_description_test!(
localhost,
"GET https://localhost/data",
"http.client",
"GET https://localhost"
);
span_description_test!(
loopback,
"GET https://127.0.0.1/data",
"http.client",
"GET https://127.0.0.1"
);
span_description_test!(
ip_address,
"GET https://8.8.8.8/data",
"http.client",
"GET https://*.*.*.*"
);
span_description_test!(
path_md5_hashes,
"GET /clients/563712f9722fb0996ac8f3905b40786f/project/01234",
"http.client",
"GET *"
);
span_description_test!(
path_sha_hashes,
"GET /clients/403926033d001b5279df37cbbe5287b7c7c267fa/project/01234",
"http.client",
"GET *"
);
span_description_test!(
hex,
"GET /shop/de/f43/beef/3D6/my-beef",
"http.client",
"GET *"
);
span_description_test!(
path_uuids,
"GET /clients/8ff81d74-606d-4c75-ac5e-cee65cbbc866/project/01234",
"http.client",
"GET *"
);
span_description_test!(
data_images,
"GET data:image/png;base64,drtfghaksjfdhaeh/blah/blah/blah",
"http.client",
"GET data:image/*"
);
span_description_test!(
simple_cctld,
"GET http://bbc.co.uk",
"http.client",
"GET http://bbc.co.uk"
);
span_description_test!(
longer_cctld,
"GET http://www.radio1.bbc.co.uk",
"http.client",
"GET http://*.bbc.co.uk"
);
span_description_test!(
complicated_tld,
"GET https://application.www.xn--85x722f.xn--55qx5d.cn",
"http.client",
"GET https://*.xn--85x722f.xn--55qx5d.cn"
);
span_description_test!(
only_dblike_on_db_ops,
"SELECT count() FROM table WHERE id IN (%s, %s)",
"http.client",
""
);
span_description_test_with_lowercase!(
cache,
cache_lower,
"GET abc:12:{def}:{34}:{fg56}:EAB38:zookeeper",
"cache.get_item",
"GET *"
);
span_description_test_with_lowercase!(
redis_set,
redis_set_lower,
"SET mykey myvalue",
"db.redis",
"SET *"
);
span_description_test_with_lowercase!(
redis_set_quoted,
redis_set_quoted_lower,
r#"SET mykey 'multi: part, value'"#,
"db.redis",
"SET *"
);
span_description_test_with_lowercase!(
redis_whitespace,
redis_whitespace_lower,
" GET asdf:123",
"db.redis",
"GET *"
);
span_description_test_with_lowercase!(
redis_no_args,
redis_no_args_lower,
"EXEC",
"db.redis",
"EXEC"
);
span_description_test_with_lowercase!(
redis_invalid,
redis_invalid_lower,
"What a beautiful day!",
"db.redis",
"*"
);
span_description_test_with_lowercase!(
redis_long_command,
redis_long_command_lower,
"ACL SETUSER jane",
"db.redis",
"ACL SETUSER *"
);
span_description_test!(
nothing_cache,
"abc-dontscrubme-meneither:stillno:ohplsstop",
"cache.get_item",
"*"
);
span_description_test!(
resource_script,
"https://example.com/static/chunks/vendors-node_modules_somemodule_v1.2.3_mini-dist_index_js-client_dist-6c733292-f3cd-11ed-a05b-0242ac120003-0dc369dcf3d311eda05b0242ac120003.[hash].abcd1234.chunk.js-0242ac120003.map",
"resource.script",
"https://example.com/static/chunks/*.map"
);
span_description_test!(
resource_script_numeric_filename,
"https://example.com/static/chunks/09876543211234567890",
"resource.script",
"https://example.com/static/chunks/*"
);
span_description_test!(
resource_next_chunks,
"/_next/static/chunks/12345-abcdef0123456789.js",
"resource.script",
"/_next/static/chunks/*-*.js"
);
span_description_test!(
resource_next_media,
"/_next/static/media/Some_Font-Bold.0123abcd.woff2",
"resource.css",
"/_next/static/media/Some_Font-Bold.*.woff2"
);
span_description_test!(
resource_css,
"https://example.com/assets/dark_high_contrast-764fa7c8-f3cd-11ed-a05b-0242ac120003.css",
"resource.css",
"https://example.com/assets/dark_high_contrast-*.css"
);
span_description_test!(
integer_in_resource,
"https://example.com/assets/this_is-a_good_resource-123-scrub_me.js",
"resource.css",
"https://example.com/assets/*.js"
);
span_description_test!(
resource_query_params,
"/organization-avatar/123/?s=120",
"resource.img",
"/*/"
);
span_description_test!(
resource_query_params2,
"https://data.domain.com/data/guide123.gif?jzb=3f535634H467g5-2f256f&ct=1234567890&v=1.203.0_prod",
"resource.img",
"https://*.domain.com/data/*.gif"
);
span_description_test!(
resource_query_params2_script,
"https://data.domain.com/data/guide123.js?jzb=3f535634H467g5-2f256f&ct=1234567890&v=1.203.0_prod",
"resource.script",
"https://*.domain.com/data/guide*.js"
);
span_description_test!(
resource_no_ids,
"https://data.domain.com/js/guide.js",
"resource.script",
"https://*.domain.com/js/guide.js"
);
span_description_test!(
resource_no_ids_img_known_segment,
"https://data.domain.com/data/guide.gif",
"resource.img",
"https://*.domain.com/data/*.gif"
);
span_description_test!(
resource_no_ids_img,
"https://data.domain.com/something/guide.gif",
"resource.img",
"https://*.domain.com/*/*.gif"
);
span_description_test!(
resource_webpack,
"https://domain.com/path/to/app-1f90d5.f012d11690e188c96fe6.js",
"resource.js",
"https://domain.com/*/app-*.*.js"
);
span_description_test!(
resource_vite,
"webroot/assets/Profile-73f6525d.js",
"resource.js",
"*/assets/Profile-*.js"
);
span_description_test!(
resource_vite_css,
"webroot/assets/Shop-1aff80f7.css",
"resource.css",
"*/assets/Shop-*.css"
);
span_description_test!(
chrome_extension,
"chrome-extension://begnopegbbhjeeiganiajffnalhlkkjb/img/assets/icon-10k.svg",
"resource.other",
"browser-extension://*"
);
span_description_test!(
urlencoded_path_segments,
"https://some.domain.com/embed/%2Fembed%2Fdashboards%2F20%3FSlug%3Dsomeone%*hide_title%3Dtrue",
"resource.iframe",
"https://*.domain.com/*/*"
);
span_description_test!(
random_string1,
"https://static.domain.com/6gezWf_qs4Wc12Nz9rpLOx2aw2k/foo-99",
"resource.img",
"https://*.domain.com/*/*"
);
span_description_test!(
random_string1_script,
"https://static.domain.com/6gezWf_qs4Wc12Nz9rpLOx2aw2k/foo-99",
"resource.script",
"https://*.domain.com/*/foo-*"
);
span_description_test!(
random_string2,
"http://domain.com/fy2XSqBMqkEm_qZZH3RrzvBTKg4/qltdXIJWTF_cuwt3uKmcwWBc1DM/z1a--BVsUI_oyUjJR12pDBcOIn5.dom.jsonp",
"resource.script",
"http://domain.com/*/*.jsonp"
);
span_description_test!(
random_string3,
"jkhdkkncnoglghljlkmcimlnlhkeamab/123.css",
"resource.link",
"*/*.css"
);
span_description_test!(
ui_load,
"ListAppViewController",
"ui.load",
"ListAppViewController"
);
span_description_test!(
contentprovider_load,
"io.sentry.android.core.SentryPerformanceProvider.onCreate",
"contentprovider.load",
"io.sentry.android.core.SentryPerformanceProvider.onCreate"
);
span_description_test!(
application_load,
"io.sentry.samples.android.MyApplication.onCreate",
"application.load",
"io.sentry.samples.android.MyApplication.onCreate"
);
span_description_test!(
activity_load,
"io.sentry.samples.android.MainActivity.onCreate",
"activity.load",
"io.sentry.samples.android.MainActivity.onCreate"
);
span_description_test!(
span_description_file_write_keep_extension_only,
"data.data (42 KB)",
"file.write",
"*.data"
);
span_description_test!(
span_description_file_read_keep_extension_only,
"Info.plist",
"file.read",
"*.plist"
);
span_description_test!(
span_description_file_with_no_extension,
"somefilenamewithnoextension",
"file.read",
"*"
);
span_description_test!(
span_description_file_extension_with_numbers_only,
"backup.2024041101",
"file.read",
"*"
);
span_description_test!(
resource_url_with_fragment,
"https://data.domain.com/data/guide123.gif#url=someotherurl",
"resource.img",
"https://*.domain.com/data/*.gif"
);
span_description_test!(
resource_script_with_no_extension,
"https://www.domain.com/page?id=1234567890",
"resource.script",
"https://*.domain.com/page"
);
span_description_test!(
resource_script_with_no_domain,
"/page.js?action=name",
"resource.script",
"/page.js"
);
span_description_test!(
resource_script_with_no_domain_no_extension,
"/page?action=name",
"resource.script",
"/page"
);
span_description_test!(
resource_script_with_long_extension,
"/path/to/file.thisismycustomfileextension2000",
"resource.script",
"/*/file.*"
);
span_description_test!(
resource_script_with_long_suffix,
"/path/to/file.js~ri~some-_-1,,thing-_-words%2Fhere~ri~",
"resource.script",
"/*/file.js"
);
span_description_test!(
resource_script_with_tilde_extension,
"/path/to/file.~~",
"resource.script",
"/*/file"
);
span_description_test!(
resource_img_extension,
"http://domain.com/something.123",
"resource.img",
"http://domain.com/*.*"
);
span_description_test!(
resource_img_embedded,
"data:image/svg+xml;base64,PHN2ZyB4bW",
"resource.img",
"data:image/svg+xml"
);
span_description_test!(
db_category_with_mongodb_query,
"find({some_id:1234567890},{limit:100})",
"db",
""
);
span_description_test!(db_category_with_not_sql, "{someField:someValue}", "db", "");
span_description_test!(
resource_img_semi_colon,
"http://www.foo.com/path/to/resource;param1=test;param2=ing",
"resource.img",
"http://*.foo.com/*/*"
);
span_description_test!(
resource_img_comma_with_extension,
"https://example.org/p/fit=cover,width=150,height=150,format=auto,quality=90/media/photosV2/weird-stuff-123-234-456.jpg",
"resource.img",
"https://example.org/*/media/*/*.jpg"
);
span_description_test!(
resource_script_comma_with_extension,
"https://example.org/p/fit=cover,width=150,height=150,format=auto,quality=90/media/photosV2/weird-stuff-123-234-456.js",
"resource.script",
"https://example.org/*/media/*/weird-stuff-*-*-*.js"
);
span_description_test!(
resource_img_path_with_comma,
"/help/purchase-details/1,*,0&fmt=webp&qlt=*,1&fit=constrain,0&op_sharpen=0&resMode=sharp2&iccEmbed=0&printRes=*",
"resource.img",
"/*/*"
);
span_description_test!(
resource_script_path_with_comma,
"/help/purchase-details/1,*,0&fmt=webp&qlt=*,1&fit=constrain,0&op_sharpen=0&resMode=sharp2&iccEmbed=0&printRes=*",
"resource.script",
"/*/*"
);
span_description_test!(
resource_script_random_path_only,
"/ERs-sUsu3/wd4/LyMTWg/Ot1Om4m8cu3p7a/QkJWAQ/FSYL/GBlxb3kB",
"resource.script",
"/*/*"
);
span_description_test!(
resource_script_normalize_domain,
"https://sub.sub.sub.domain.com/resource.js",
"resource.script",
"https://*.domain.com/resource.js"
);
span_description_test!(
resource_script_extension_in_segment,
"https://domain.com/foo.bar/resource.js",
"resource.script",
"https://domain.com/*/resource.js"
);
span_description_test!(
resource_script_missing_scheme,
"domain.com/foo.bar/resource.js",
"resource.script",
"*/resource.js"
);
span_description_test!(
resource_script_missing_scheme_integer_id,
"domain.com/zero-length-00",
"resource.script",
"*/zero-length-*"
);
span_description_test!(db_prisma, "User find", "db.sql.prisma", "User find");
span_description_test!(
function_python,
"sentry.event_manager.assign_event_to_group",
"function",
"sentry.event_manager.assign_event_to_group"
);
span_description_test!(
function_rust,
"symbolicator_native::symbolication::symbolicate::symbolicate",
"function",
"symbolicator_native::symbolication::symbolicate::symbolicate"
);
span_description_test!(
function_with_hex,
"symbolicator_native::symbolication::symbolicate::deadbeef",
"function",
"symbolicator_native::symbolication::symbolicate::*"
);
span_description_test!(
function_with_uuid,
"symbolicator_native::symbolication::fb37f08422034ee985e9fc553ef27e6e::symbolicate",
"function",
"symbolicator_native::symbolication::*::symbolicate"
);
#[test]
fn informed_sql_parser() {
let json = r#"
{
"description": "SELECT \"not an identifier\"",
"span_id": "bd2eb23da2beb459",
"start_timestamp": 1597976393.4619668,
"timestamp": 1597976393.4718769,
"trace_id": "ff62a8b040f340bda5d830223def1d81",
"op": "db",
"data": {"db.system": "mysql"}
}
"#;
let mut span = Annotated::<Span>::from_json(json).unwrap();
let span = span.value_mut().as_mut().unwrap();
let scrubbed = scrub_span_description(span, &[]);
assert_eq!(scrubbed.0.as_deref(), Some("SELECT %s"));
}
#[test]
fn active_record() {
let json = r#"{
"description": "/*some comment `my_function'*/ SELECT `a` FROM `b`",
"op": "db.sql.activerecord"
}"#;
let mut span = Annotated::<Span>::from_json(json).unwrap();
let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
assert!(scrubbed.0.is_none());
}
#[test]
fn active_record_with_db_system() {
let json = r#"{
"description": "/*some comment `my_function'*/ SELECT `a` FROM `b`",
"op": "db.sql.activerecord",
"data": {
"db.system": "mysql"
}
}"#;
let mut span = Annotated::<Span>::from_json(json).unwrap();
let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
assert_eq!(scrubbed.0.as_deref(), Some("SELECT a FROM b"));
}
#[test]
fn redis_with_db_system() {
let json = r#"{
"description": "del myveryrandomkey:123Xalsdkxfhn",
"op": "db",
"data": {
"db.system": "redis"
}
}"#;
let mut span = Annotated::<Span>::from_json(json).unwrap();
let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
assert_eq!(scrubbed.0.as_deref(), Some("DEL *"));
}
#[test]
fn core_data() {
let json = r#"{
"description": "INSERTED 1 'UAEventData'",
"op": "db.sql.transaction",
"origin": "auto.db.core_data"
}"#;
let mut span = Annotated::<Span>::from_json(json).unwrap();
let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
assert_eq!(scrubbed.0.as_deref(), Some("INSERTED * 'UAEventData'"));
}
#[test]
fn multiple_core_data() {
let json = r#"{
"description": "UPDATED 1 'QueuedRequest', DELETED 1 'QueuedRequest'",
"op": "db.sql.transaction",
"origin": "auto.db.core_data"
}"#;
let mut span = Annotated::<Span>::from_json(json).unwrap();
let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
assert_eq!(
scrubbed.0.as_deref(),
Some("UPDATED * 'QueuedRequest', DELETED * 'QueuedRequest'")
);
}
#[test]
fn mongodb_scrubbing() {
let json = r#"{
"description": "{\"find\": \"documents\", \"foo\": \"bar\"}",
"op": "db",
"data": {
"db.system": "mongodb",
"db.operation": "find",
"db.collection.name": "documents"
}
}"#;
let mut span = Annotated::<Span>::from_json(json).unwrap();
let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
assert_eq!(
scrubbed.0.as_deref(),
Some(r#"{"find":"documents","foo":"?"}"#)
)
}
#[test]
fn mongodb_with_legacy_collection_property() {
let json = r#"{
"description": "{\"find\": \"documents\", \"foo\": \"bar\"}",
"op": "db",
"data": {
"db.system": "mongodb",
"db.operation": "find",
"db.mongodb.collection": "documents"
}
}"#;
let mut span = Annotated::<Span>::from_json(json).unwrap();
let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
assert_eq!(
scrubbed.0.as_deref(),
Some(r#"{"find":"documents","foo":"?"}"#)
)
}
#[test]
fn ui_interaction_with_component_name() {
let json = r#"{
"description": "input.app-asdfasfg.asdfasdf[type=\"range\"][name=\"replay-timeline\"]",
"op": "ui.interaction.click",
"data": {
"ui.component_name": "my-component-name"
}
}"#;
let mut span = Annotated::<Span>::from_json(json).unwrap();
let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
assert_eq!(scrubbed.0.as_deref(), Some("my-component-name"));
}
#[test]
fn scrub_allowed_host() {
let examples = [
(
"https://foo.bar.internal/api/v1/submit",
["foo.bar.internal".to_string()],
"https://foo.bar.internal",
),
(
"http://192.168.1.1:3000",
["192.168.1.1".to_string()],
"http://192.168.1.1:3000",
),
(
"http://[1fff:0:a88:85a3::ac1f]:8001/foo",
["[1fff:0:a88:85a3::ac1f]".to_string()],
"http://[1fff:0:a88:85a3::ac1f]:8001",
),
];
for (url, allowed_hosts, expected) in examples {
let json = format!(
r#"{{
"description": "POST {}",
"span_id": "bd2eb23da2beb459",
"start_timestamp": 1597976393.4619668,
"timestamp": 1597976393.4718769,
"trace_id": "ff62a8b040f340bda5d830223def1d81",
"op": "http.client"
}}
"#,
url,
);
let mut span = Annotated::<Span>::from_json(&json).unwrap();
let scrubbed =
scrub_span_description(span.value_mut().as_mut().unwrap(), &allowed_hosts);
assert_eq!(
scrubbed.0.as_deref(),
Some(format!("POST {}", expected).as_str()),
"Could not match {url}"
);
}
}
macro_rules! mongodb_scrubbing_test {
($name:ident, $description_in:expr, $operation_in:literal, $collection_in:literal, $expected:literal) => {
#[test]
fn $name() {
let json = format!(
r#"
{{
"description": "",
"span_id": "bd2eb23da2beb459",
"start_timestamp": 1597976393.4619668,
"timestamp": 1597976393.4718769,
"trace_id": "ff62a8b040f340bda5d830223def1d81",
"op": "db",
"data": {{
"db.system": "mongodb",
"db.operation": {},
"db.collection.name": {}
}}
}}
"#,
if $operation_in == "" {
"null".to_string()
} else {
format!("\"{}\"", $operation_in)
},
if $collection_in == "" {
"null".to_string()
} else {
format!("\"{}\"", $collection_in)
}
);
let mut span = Annotated::<Span>::from_json(&json).unwrap();
span.value_mut()
.as_mut()
.unwrap()
.description
.set_value(Some($description_in.into()));
let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
if $expected == "" {
assert!(scrubbed.0.is_none());
} else {
assert_eq!($expected, scrubbed.0.unwrap());
}
}
};
}
mongodb_scrubbing_test!(
mongodb_basic_query,
r#"{"find": "documents", "showRecordId": true}"#,
"find",
"documents",
r#"{"find":"documents","showRecordId":"?"}"#
);
mongodb_scrubbing_test!(
mongodb_query_with_document_param,
r#"{"find": "documents", "filter": {"foo": "bar"}}"#,
"find",
"documents",
r#"{"filter":{"foo":"?"},"find":"documents"}"#
);
mongodb_scrubbing_test!(
mongodb_query_without_operation,
r#"{"filter": {"foo": "bar"}}"#,
"find",
"documents",
r#"{"filter":{"foo":"?"},"find":"documents"}"#
);
mongodb_scrubbing_test!(
mongodb_without_collection_in_data,
r#"{"find": "documents", "showRecordId": true}"#,
"find",
"",
""
);
mongodb_scrubbing_test!(
mongodb_without_operation_in_data,
r#"{"find": "documents", "showRecordId": true}"#,
"",
"documents",
""
);
mongodb_scrubbing_test!(
mongodb_max_depth,
r#"{"update": "coll", "updates": {"q": {"_id": "1"}, "u": {"$set": {"foo": {"bar": {"baz": "quux"}}}}}}"#,
"update",
"coll",
r#"{"update":"coll","updates":{"q":{"_id":"?"},"u":{"$set":{"foo":"?"}}}}"#
);
mongodb_scrubbing_test!(
mongodb_identifier_in_collection,
r#"{"find": "documents001", "showRecordId": true}"#,
"find",
"documents001",
r#"{"find":"documents{%s}","showRecordId":"?"}"#
);
mongodb_scrubbing_test!(
mongodb_query_with_array,
r#"{"insert": "documents", "documents": [{"foo": "bar"}, {"baz": "quux"}, {"qux": "quuz"}]}"#,
"insert",
"documents",
r#"{"documents":["..."],"insert":"documents"}"#
);
mongodb_scrubbing_test!(
mongodb_query_with_buffer,
r#"{"insert": "documents", "buffer": {"0": "a", "1": "b", "2": "c"}, "documents": [{"foo": "bar"}]}"#,
"insert",
"documents",
r#"{"documents":["..."],"insert":"documents"}"#
);
}