1mod redis;
3mod resource;
4mod sql;
5use once_cell::sync::Lazy;
6use psl;
7use relay_filter::matches_any_origin;
8use serde_json::Value;
9#[cfg(test)]
10pub use sql::{Mode, scrub_queries};
11
12use relay_event_schema::protocol::Span;
13use std::borrow::Cow;
14use std::net::{Ipv4Addr, Ipv6Addr};
15use std::path::Path;
16use url::{Host, Url};
17
18use crate::regexes::{
19 DB_SQL_TRANSACTION_CORE_DATA_REGEX, DB_SUPABASE_REGEX, FUNCTION_NORMALIZER_REGEX,
20 RESOURCE_NORMALIZER_REGEX,
21};
22use crate::span::TABLE_NAME_REGEX;
23use crate::span::description::redis::matching_redis_command;
24use crate::span::description::resource::COMMON_PATH_SEGMENTS;
25use crate::span::tag_extraction::HTTP_METHOD_EXTRACTOR_REGEX;
26
27static DUMMY_BASE_URL: Lazy<Url> = Lazy::new(|| "http://replace_me".parse().unwrap());
29
30const MAX_SEGMENT_LENGTH: usize = 25;
34
35const MAX_EXTENSION_LENGTH: usize = 10;
37
38const DOMAIN_ALLOW_LIST: &[&str] = &["localhost"];
40
41pub(crate) fn scrub_span_description(
45 span: &Span,
46 span_allowed_hosts: &[String],
47) -> (Option<String>, Option<Vec<sqlparser::ast::Statement>>) {
48 let Some(description) = span.description.as_str() else {
49 return (None, None);
50 };
51
52 let data = span.data.value();
53
54 let db_system = data
55 .and_then(|data| data.db_system.value())
56 .and_then(|system| system.as_str());
57 let span_origin = span.origin.as_str();
58
59 let mut parsed_sql = None;
60 let scrubbed_description = span
61 .op
62 .as_str()
63 .map(|op| op.split_once('.').unwrap_or((op, "")))
64 .and_then(|(op, sub)| match (op, sub) {
65 ("http", _) => scrub_http(description, span_allowed_hosts),
66 ("cache", _) | ("db", "redis") => scrub_redis_keys(description),
67 ("db", _) if db_system == Some("redis") => scrub_redis_keys(description),
68 ("db", _) if db_system == Some("mongodb") => {
69 let command = data
70 .and_then(|data| data.db_operation.value())
71 .and_then(|command| command.as_str());
72
73 let collection = data
74 .and_then(|data| data.db_collection_name.value())
75 .and_then(|collection| collection.as_str());
76
77 if let (Some(command), Some(collection)) = (command, collection) {
78 scrub_mongodb_query(description, command, collection)
79 } else {
80 None
81 }
82 }
83 ("db", sub) => {
84 if sub.contains("clickhouse")
85 || sub.contains("mongodb")
86 || sub.contains("redis")
87 || is_legacy_activerecord(sub, db_system)
88 || is_sql_mongodb(description, db_system)
89 {
90 None
91 } else if span_origin == Some("auto.db.core_data") {
93 scrub_core_data(description)
94 } else if sub.contains("prisma") {
95 Some(description.to_owned())
99 } else if span_origin == Some("auto.db.supabase")
100 && description.starts_with("from(")
101 {
102 scrub_supabase(description)
106 } else {
107 let (scrubbed, mode) = sql::scrub_queries(db_system, description);
108 if let sql::Mode::Parsed(ast) = mode {
109 parsed_sql = Some(ast);
110 }
111 scrubbed
112 }
113 }
114 ("resource", ty) => scrub_resource(ty, description),
115 ("ai", sub) => match sub.split_once('.').unwrap_or((sub, "")) {
116 ("run" | "pipeline", _) => {
117 Some(description.to_owned())
120 }
121 _ => None,
122 },
123 ("ui", "load") => {
124 Some(description.to_owned())
127 }
128 ("ui", sub) if sub.starts_with("interaction.") || sub.starts_with("react.") => data
129 .and_then(|data| data.ui_component_name.value())
130 .and_then(|value| value.as_str())
131 .map(String::from),
132 ("app", _) => {
133 Some(description.to_owned())
137 }
138 ("contentprovider", "load") => {
139 Some(description.to_owned())
144 }
145 ("application", "load") => {
146 Some(description.to_owned())
151 }
152 ("activity", "load") => {
153 Some(description.to_owned())
157 }
158 ("file", _) => scrub_file(description),
159 ("function", _) => scrub_function(description),
160 _ => None,
161 });
162 (scrubbed_description, parsed_sql)
163}
164
165fn is_sql_mongodb(description: &str, db_system: Option<&str>) -> bool {
167 description.contains("\"$")
168 || description.contains("({")
169 || description.contains("[{")
170 || description.starts_with('{')
171 || db_system == Some("mongodb")
172}
173
174fn is_legacy_activerecord(sub_op: &str, db_system: Option<&str>) -> bool {
176 db_system.is_none() && (sub_op.contains("active_record") || sub_op.contains("activerecord"))
177}
178
179fn scrub_core_data(string: &str) -> Option<String> {
180 match DB_SQL_TRANSACTION_CORE_DATA_REGEX.replace_all(string, "*") {
181 Cow::Owned(scrubbed) => Some(scrubbed),
182 Cow::Borrowed(_) => None,
183 }
184}
185
186fn scrub_supabase(string: &str) -> Option<String> {
187 Some(DB_SUPABASE_REGEX.replace_all(string, "{%s}").into())
188}
189
190fn scrub_http(string: &str, allow_list: &[String]) -> Option<String> {
191 let (method, url) = string.split_once(' ')?;
192 if !HTTP_METHOD_EXTRACTOR_REGEX.is_match(method) {
193 return None;
194 };
195
196 if url.starts_with("data:image/") {
197 return Some(format!("{method} data:image/*"));
198 }
199
200 let scrubbed = match Url::parse(url) {
201 Ok(url) => {
202 let scheme = url.scheme();
203 let scrubbed_host = url.host().map(|host| scrub_host(host, allow_list));
204 let domain = concatenate_host_and_port(scrubbed_host.as_deref(), url.port());
205
206 format!("{method} {scheme}://{domain}")
207 }
208 Err(_) => {
209 format!("{method} *")
210 }
211 };
212
213 Some(scrubbed)
214}
215
216fn scrub_file(description: &str) -> Option<String> {
217 let filename = match description.split_once(' ') {
218 Some((filename, _)) => filename,
219 _ => description,
220 };
221 match Path::new(filename).extension() {
222 Some(extension) => {
223 let ext = scrub_resource_file_extension(extension.to_str()?);
224 if ext != "*" {
225 Some(format!("*.{ext}"))
226 } else {
227 Some("*".to_string())
228 }
229 }
230 _ => Some("*".to_owned()),
231 }
232}
233
234pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [String]) -> Cow<'a, str> {
251 let allow_list: Vec<_> = allow_list
252 .iter()
253 .map(|origin| origin.as_str().into())
254 .collect();
255
256 if matches_any_origin(Some(host.to_string().as_str()), &allow_list) {
257 return host.to_string().into();
258 }
259
260 match host {
261 Host::Ipv4(ip) => Cow::Borrowed(scrub_ipv4(ip)),
262 Host::Ipv6(ip) => Cow::Borrowed(scrub_ipv6(ip)),
263 Host::Domain(domain) => scrub_domain_name(domain),
264 }
265}
266
267pub fn scrub_ipv4(ip: Ipv4Addr) -> &'static str {
282 match ip {
283 Ipv4Addr::LOCALHOST => "127.0.0.1",
284 _ => "*.*.*.*",
285 }
286}
287
288pub fn scrub_ipv6(ip: Ipv6Addr) -> &'static str {
300 match ip {
301 Ipv6Addr::LOCALHOST => "::1",
302 _ => "*:*:*:*:*:*:*:*",
303 }
304}
305
306pub fn scrub_domain_name(domain: &str) -> Cow<'_, str> {
321 if DOMAIN_ALLOW_LIST.contains(&domain) {
322 return Cow::Borrowed(domain);
323 }
324
325 let parsed_domain = psl::domain(domain.as_bytes());
326
327 let Some(parsed_domain) = parsed_domain else {
328 return Cow::Borrowed(domain);
330 };
331
332 let suffix = parsed_domain.suffix().as_bytes();
333 let Some(second_level_domain) = parsed_domain.as_bytes().strip_suffix(suffix) else {
334 return Cow::Borrowed(domain);
335 };
336
337 let subdomain = domain
338 .as_bytes()
339 .strip_suffix(suffix)
340 .and_then(|s| s.strip_suffix(second_level_domain));
341
342 match subdomain {
343 None | Some(b"") => Cow::Borrowed(domain),
344 Some(_subdomain) => {
345 let scrubbed = [b"*.", second_level_domain, suffix].concat();
346 match String::from_utf8(scrubbed) {
347 Ok(s) => Cow::Owned(s),
348 Err(_) => Cow::Borrowed(domain),
349 }
350 }
351 }
352}
353
354pub fn concatenate_host_and_port(host: Option<&str>, port: Option<u16>) -> Cow<str> {
368 match (host, port) {
369 (None, _) => Cow::Borrowed(""),
370 (Some(host), None) => Cow::Borrowed(host),
371 (Some(host), Some(port)) => Cow::Owned(format!("{host}:{port}")),
372 }
373}
374
375fn scrub_redis_keys(string: &str) -> Option<String> {
376 let string = string.trim();
377 Some(match matching_redis_command(string) {
378 Some(command) => {
379 let mut command = command.to_uppercase();
380 match string.get(command.len()..) {
381 None | Some("") => command,
382 Some(_other) => {
383 command.push_str(" *");
384 command
385 }
386 }
387 }
388 None => "*".to_owned(),
389 })
390}
391
392enum UrlType {
393 Full,
395 Absolute,
397 Relative,
399}
400
401fn scrub_resource(resource_type: &str, string: &str) -> Option<String> {
403 let (url, ty) = match Url::parse(string) {
404 Ok(url) => (url, UrlType::Full),
405 Err(url::ParseError::RelativeUrlWithoutBase) => {
406 match Url::options().base_url(Some(&DUMMY_BASE_URL)).parse(string) {
408 Ok(url) => (
409 url,
410 if string.starts_with('/') {
411 UrlType::Absolute
412 } else {
413 UrlType::Relative
414 },
415 ),
416 Err(_) => return None,
417 }
418 }
419 Err(_) => {
420 return None;
421 }
422 };
423
424 let formatted = match url.scheme() {
425 "data" => match url.path().split_once(';') {
426 Some((ty, _data)) => format!("data:{ty}"),
427 None => "data:*/*".to_owned(),
428 },
429 "chrome-extension" | "moz-extension" | "ms-browser-extension" => {
430 return Some("browser-extension://*".to_owned());
431 }
432 scheme => {
433 let scrubbed_host = url.host().map(|host| scrub_host(host, &[]));
434 let domain = concatenate_host_and_port(scrubbed_host.as_deref(), url.port());
435
436 let segment_count = url.path_segments().map(|s| s.count()).unwrap_or_default();
437 let mut output_segments = vec![];
438 for (i, segment) in url.path_segments().into_iter().flatten().enumerate() {
439 if i + 1 == segment_count {
440 break;
441 }
442 if COMMON_PATH_SEGMENTS.contains(segment) {
443 output_segments.push(segment);
444 } else if output_segments.last().is_none_or(|s| *s != "*") {
445 output_segments.push("*");
447 }
448 }
449
450 let segments = output_segments.join("/");
451
452 let last_segment = url
453 .path_segments()
454 .and_then(|mut s| s.next_back())
455 .unwrap_or_default();
456 let last_segment = scrub_resource_filename(resource_type, last_segment);
457
458 if segments.is_empty() {
459 format!("{scheme}://{domain}/{last_segment}")
460 } else {
461 format!("{scheme}://{domain}/{segments}/{last_segment}")
462 }
463 }
464 };
465
466 let formatted = match ty {
468 UrlType::Full => formatted,
469 UrlType::Absolute => formatted.replace("http://replace_me", ""),
470 UrlType::Relative => formatted.replace("http://replace_me/", ""),
471 };
472
473 Some(formatted)
474}
475
476fn scrub_resource_filename<'a>(ty: &str, path: &'a str) -> Cow<'a, str> {
477 if path.is_empty() {
478 return Cow::Borrowed("");
479 }
480 let (mut basename, mut extension) = path.rsplit_once('.').unwrap_or((path, ""));
481 if extension.contains('/') {
482 basename = path;
484 extension = "";
485 }
486
487 let extension = scrub_resource_file_extension(extension);
488
489 let basename = if ty == "img" {
490 Cow::Borrowed("*")
491 } else {
492 scrub_resource_segment(basename)
493 };
494
495 if extension.is_empty() {
496 basename
497 } else {
498 let mut filename = basename.to_string();
499 filename.push('.');
500 filename.push_str(extension);
501 Cow::Owned(filename)
502 }
503}
504
505fn scrub_resource_segment(segment: &str) -> Cow<str> {
506 let segment = RESOURCE_NORMALIZER_REGEX.replace_all(segment, "$pre*$post");
507
508 if segment.len() > MAX_SEGMENT_LENGTH {
510 return Cow::Borrowed("*");
511 }
512
513 let mut all_alphabetic = true;
514 let mut found_uppercase = false;
515
516 for char in segment.chars() {
518 if !char.is_ascii_alphabetic() {
519 all_alphabetic = false;
520 }
521 if char.is_ascii_uppercase() {
522 found_uppercase = true;
523 }
524 if char.is_numeric() || "&%#=+@".contains(char) {
525 return Cow::Borrowed("*");
526 };
527 }
528
529 if all_alphabetic && found_uppercase {
530 return Cow::Borrowed("*");
532 }
533
534 segment
535}
536
537fn scrub_resource_file_extension(mut extension: &str) -> &str {
538 let mut digits = 0;
540 for (i, byte) in extension.bytes().enumerate() {
541 if byte.is_ascii_digit() {
542 digits += 1;
543 }
544 if digits > 1 {
545 return "*";
547 }
548 if !byte.is_ascii_alphanumeric() {
549 extension = &extension[..i];
550 break;
551 }
552 }
553
554 if extension.len() > MAX_EXTENSION_LENGTH {
555 extension = "*";
556 }
557
558 extension
559}
560
561fn scrub_function(string: &str) -> Option<String> {
562 Some(FUNCTION_NORMALIZER_REGEX.replace_all(string, "*").into())
563}
564
565fn scrub_mongodb_query(query: &str, command: &str, collection: &str) -> Option<String> {
566 let mut query: Value = serde_json::from_str(query).ok()?;
567
568 let root = query.as_object_mut()?;
569
570 root.remove("buffer");
572
573 for value in root.values_mut() {
574 scrub_mongodb_visit_node(value, 3);
575 }
576
577 let scrubbed_collection_name =
578 if let Cow::Owned(s) = TABLE_NAME_REGEX.replace_all(collection, "{%s}") {
579 s
580 } else {
581 collection.to_owned()
582 };
583 root.insert(command.to_owned(), Value::String(scrubbed_collection_name));
584
585 Some(query.to_string())
586}
587
588fn scrub_mongodb_visit_node(value: &mut Value, recursion_limit: usize) {
589 if recursion_limit == 0 {
590 match value {
591 Value::String(str) => {
592 str.clear();
593 str.push('?');
594 }
595 value => *value = Value::String("?".to_owned()),
596 }
597 return;
598 }
599
600 match value {
601 Value::Object(map) => {
602 for value in map.values_mut() {
603 scrub_mongodb_visit_node(value, recursion_limit - 1);
604 }
605 }
606 Value::Array(arr) => {
607 arr.clear();
608 arr.push(Value::String("...".to_owned()));
609 }
610 Value::String(str) => {
611 str.clear();
612 str.push('?');
613 }
614 value => *value = Value::String("?".to_owned()),
615 }
616}
617
618#[cfg(test)]
619mod tests {
620 use super::*;
621 use relay_protocol::Annotated;
622 use similar_asserts::assert_eq;
623
624 macro_rules! span_description_test {
625 ($name:ident, $description_in:expr, $op_in:literal, $expected:literal) => {
630 #[test]
631 fn $name() {
632 let json = format!(
633 r#"
634 {{
635 "description": "",
636 "span_id": "bd2eb23da2beb459",
637 "start_timestamp": 1597976393.4619668,
638 "timestamp": 1597976393.4718769,
639 "trace_id": "ff62a8b040f340bda5d830223def1d81",
640 "op": "{}"
641 }}
642 "#,
643 $op_in
644 );
645
646 let mut span = Annotated::<Span>::from_json(&json).unwrap();
647 span.value_mut()
648 .as_mut()
649 .unwrap()
650 .description
651 .set_value(Some($description_in.into()));
652
653 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
654
655 if $expected == "" {
656 assert!(scrubbed.0.is_none());
657 } else {
658 assert_eq!($expected, scrubbed.0.unwrap());
659 }
660 }
661 };
662 }
663
664 macro_rules! span_description_test_with_lowercase {
665 ($name:ident, $name2:ident, $description_in:expr, $op_in:literal, $expected:literal) => {
666 span_description_test!($name, $description_in, $op_in, $expected);
667 span_description_test!($name2, ($description_in).to_lowercase(), $op_in, $expected);
668 };
669 }
670
671 span_description_test!(empty, "", "http.client", "");
672
673 span_description_test!(
674 only_domain,
675 "GET http://service.io",
676 "http.client",
677 "GET http://service.io"
678 );
679
680 span_description_test!(
681 only_urllike_on_http_ops,
682 "GET https://www.service.io/resources/01234",
683 "http.client",
684 "GET https://*.service.io"
685 );
686
687 span_description_test!(
688 path_ids_end,
689 "GET https://www.service.io/resources/01234",
690 "http.client",
691 "GET https://*.service.io"
692 );
693
694 span_description_test!(
695 path_ids_middle,
696 "GET https://www.service.io/resources/01234/details",
697 "http.client",
698 "GET https://*.service.io"
699 );
700
701 span_description_test!(
702 path_multiple_ids,
703 "GET https://www.service.io/users/01234-qwerty/settings/98765-adfghj",
704 "http.client",
705 "GET https://*.service.io"
706 );
707
708 span_description_test!(
709 localhost,
710 "GET https://localhost/data",
711 "http.client",
712 "GET https://localhost"
713 );
714
715 span_description_test!(
716 loopback,
717 "GET https://127.0.0.1/data",
718 "http.client",
719 "GET https://127.0.0.1"
720 );
721
722 span_description_test!(
723 ip_address,
724 "GET https://8.8.8.8/data",
725 "http.client",
726 "GET https://*.*.*.*"
727 );
728
729 span_description_test!(
730 path_md5_hashes,
731 "GET /clients/563712f9722fb0996ac8f3905b40786f/project/01234",
732 "http.client",
733 "GET *"
734 );
735
736 span_description_test!(
737 path_sha_hashes,
738 "GET /clients/403926033d001b5279df37cbbe5287b7c7c267fa/project/01234",
739 "http.client",
740 "GET *"
741 );
742
743 span_description_test!(
744 hex,
745 "GET /shop/de/f43/beef/3D6/my-beef",
746 "http.client",
747 "GET *"
748 );
749
750 span_description_test!(
751 path_uuids,
752 "GET /clients/8ff81d74-606d-4c75-ac5e-cee65cbbc866/project/01234",
753 "http.client",
754 "GET *"
755 );
756
757 span_description_test!(
758 data_images,
759 "GET ",
760 "http.client",
761 "GET data:image/*"
762 );
763
764 span_description_test!(
765 simple_cctld,
766 "GET http://bbc.co.uk",
767 "http.client",
768 "GET http://bbc.co.uk"
769 );
770
771 span_description_test!(
772 longer_cctld,
773 "GET http://www.radio1.bbc.co.uk",
774 "http.client",
775 "GET http://*.bbc.co.uk"
776 );
777
778 span_description_test!(
779 complicated_tld,
780 "GET https://application.www.xn--85x722f.xn--55qx5d.cn",
781 "http.client",
782 "GET https://*.xn--85x722f.xn--55qx5d.cn"
783 );
784
785 span_description_test!(
786 only_dblike_on_db_ops,
787 "SELECT count() FROM table WHERE id IN (%s, %s)",
788 "http.client",
789 ""
790 );
791
792 span_description_test_with_lowercase!(
793 cache,
794 cache_lower,
795 "GET abc:12:{def}:{34}:{fg56}:EAB38:zookeeper",
796 "cache.get_item",
797 "GET *"
798 );
799
800 span_description_test_with_lowercase!(
801 redis_set,
802 redis_set_lower,
803 "SET mykey myvalue",
804 "db.redis",
805 "SET *"
806 );
807
808 span_description_test_with_lowercase!(
809 redis_set_quoted,
810 redis_set_quoted_lower,
811 r#"SET mykey 'multi: part, value'"#,
812 "db.redis",
813 "SET *"
814 );
815
816 span_description_test_with_lowercase!(
817 redis_whitespace,
818 redis_whitespace_lower,
819 " GET asdf:123",
820 "db.redis",
821 "GET *"
822 );
823
824 span_description_test_with_lowercase!(
825 redis_no_args,
826 redis_no_args_lower,
827 "EXEC",
828 "db.redis",
829 "EXEC"
830 );
831
832 span_description_test_with_lowercase!(
833 redis_invalid,
834 redis_invalid_lower,
835 "What a beautiful day!",
836 "db.redis",
837 "*"
838 );
839
840 span_description_test_with_lowercase!(
841 redis_long_command,
842 redis_long_command_lower,
843 "ACL SETUSER jane",
844 "db.redis",
845 "ACL SETUSER *"
846 );
847
848 span_description_test!(
849 nothing_cache,
850 "abc-dontscrubme-meneither:stillno:ohplsstop",
851 "cache.get_item",
852 "*"
853 );
854
855 span_description_test!(
856 resource_script,
857 "https://example.com/static/chunks/vendors-node_modules_somemodule_v1.2.3_mini-dist_index_js-client_dist-6c733292-f3cd-11ed-a05b-0242ac120003-0dc369dcf3d311eda05b0242ac120003.[hash].abcd1234.chunk.js-0242ac120003.map",
858 "resource.script",
859 "https://example.com/static/chunks/*.map"
860 );
861
862 span_description_test!(
863 resource_script_numeric_filename,
864 "https://example.com/static/chunks/09876543211234567890",
865 "resource.script",
866 "https://example.com/static/chunks/*"
867 );
868
869 span_description_test!(
870 resource_next_chunks,
871 "/_next/static/chunks/12345-abcdef0123456789.js",
872 "resource.script",
873 "/_next/static/chunks/*-*.js"
874 );
875
876 span_description_test!(
877 resource_next_media,
878 "/_next/static/media/Some_Font-Bold.0123abcd.woff2",
879 "resource.css",
880 "/_next/static/media/Some_Font-Bold.*.woff2"
881 );
882
883 span_description_test!(
884 resource_css,
885 "https://example.com/assets/dark_high_contrast-764fa7c8-f3cd-11ed-a05b-0242ac120003.css",
886 "resource.css",
887 "https://example.com/assets/dark_high_contrast-*.css"
888 );
889
890 span_description_test!(
891 integer_in_resource,
892 "https://example.com/assets/this_is-a_good_resource-123-scrub_me.js",
893 "resource.css",
894 "https://example.com/assets/*.js"
895 );
896
897 span_description_test!(
898 resource_query_params,
899 "/organization-avatar/123/?s=120",
900 "resource.img",
901 "/*/"
902 );
903
904 span_description_test!(
905 resource_query_params2,
906 "https://data.domain.com/data/guide123.gif?jzb=3f535634H467g5-2f256f&ct=1234567890&v=1.203.0_prod",
907 "resource.img",
908 "https://*.domain.com/data/*.gif"
909 );
910
911 span_description_test!(
912 resource_query_params2_script,
913 "https://data.domain.com/data/guide123.js?jzb=3f535634H467g5-2f256f&ct=1234567890&v=1.203.0_prod",
914 "resource.script",
915 "https://*.domain.com/data/guide*.js"
916 );
917
918 span_description_test!(
919 resource_no_ids,
920 "https://data.domain.com/js/guide.js",
921 "resource.script",
922 "https://*.domain.com/js/guide.js"
923 );
924
925 span_description_test!(
926 resource_no_ids_img_known_segment,
927 "https://data.domain.com/data/guide.gif",
928 "resource.img",
929 "https://*.domain.com/data/*.gif"
930 );
931
932 span_description_test!(
933 resource_no_ids_img,
934 "https://data.domain.com/something/guide.gif",
935 "resource.img",
936 "https://*.domain.com/*/*.gif"
937 );
938
939 span_description_test!(
940 resource_webpack,
941 "https://domain.com/path/to/app-1f90d5.f012d11690e188c96fe6.js",
942 "resource.js",
943 "https://domain.com/*/app-*.*.js"
944 );
945
946 span_description_test!(
947 resource_vite,
948 "webroot/assets/Profile-73f6525d.js",
949 "resource.js",
950 "*/assets/Profile-*.js"
951 );
952
953 span_description_test!(
954 resource_vite_css,
955 "webroot/assets/Shop-1aff80f7.css",
956 "resource.css",
957 "*/assets/Shop-*.css"
958 );
959
960 span_description_test!(
961 chrome_extension,
962 "chrome-extension://begnopegbbhjeeiganiajffnalhlkkjb/img/assets/icon-10k.svg",
963 "resource.other",
964 "browser-extension://*"
965 );
966
967 span_description_test!(
968 urlencoded_path_segments,
969 "https://some.domain.com/embed/%2Fembed%2Fdashboards%2F20%3FSlug%3Dsomeone%*hide_title%3Dtrue",
970 "resource.iframe",
971 "https://*.domain.com/*/*"
972 );
973
974 span_description_test!(
975 random_string1,
976 "https://static.domain.com/6gezWf_qs4Wc12Nz9rpLOx2aw2k/foo-99",
977 "resource.img",
978 "https://*.domain.com/*/*"
979 );
980
981 span_description_test!(
982 random_string1_script,
983 "https://static.domain.com/6gezWf_qs4Wc12Nz9rpLOx2aw2k/foo-99",
984 "resource.script",
985 "https://*.domain.com/*/foo-*"
986 );
987
988 span_description_test!(
989 random_string2,
990 "http://domain.com/fy2XSqBMqkEm_qZZH3RrzvBTKg4/qltdXIJWTF_cuwt3uKmcwWBc1DM/z1a--BVsUI_oyUjJR12pDBcOIn5.dom.jsonp",
991 "resource.script",
992 "http://domain.com/*/*.jsonp"
993 );
994
995 span_description_test!(
996 random_string3,
997 "jkhdkkncnoglghljlkmcimlnlhkeamab/123.css",
998 "resource.link",
999 "*/*.css"
1000 );
1001
1002 span_description_test!(
1003 ui_load,
1004 "ListAppViewController",
1005 "ui.load",
1006 "ListAppViewController"
1007 );
1008
1009 span_description_test!(
1010 contentprovider_load,
1011 "io.sentry.android.core.SentryPerformanceProvider.onCreate",
1012 "contentprovider.load",
1013 "io.sentry.android.core.SentryPerformanceProvider.onCreate"
1014 );
1015
1016 span_description_test!(
1017 application_load,
1018 "io.sentry.samples.android.MyApplication.onCreate",
1019 "application.load",
1020 "io.sentry.samples.android.MyApplication.onCreate"
1021 );
1022
1023 span_description_test!(
1024 activity_load,
1025 "io.sentry.samples.android.MainActivity.onCreate",
1026 "activity.load",
1027 "io.sentry.samples.android.MainActivity.onCreate"
1028 );
1029
1030 span_description_test!(
1031 span_description_file_write_keep_extension_only,
1032 "data.data (42 KB)",
1033 "file.write",
1034 "*.data"
1035 );
1036
1037 span_description_test!(
1038 span_description_file_read_keep_extension_only,
1039 "Info.plist",
1040 "file.read",
1041 "*.plist"
1042 );
1043
1044 span_description_test!(
1045 span_description_file_with_no_extension,
1046 "somefilenamewithnoextension",
1047 "file.read",
1048 "*"
1049 );
1050
1051 span_description_test!(
1052 span_description_file_extension_with_numbers_only,
1053 "backup.2024041101",
1054 "file.read",
1055 "*"
1056 );
1057
1058 span_description_test!(
1059 resource_url_with_fragment,
1060 "https://data.domain.com/data/guide123.gif#url=someotherurl",
1061 "resource.img",
1062 "https://*.domain.com/data/*.gif"
1063 );
1064
1065 span_description_test!(
1066 resource_script_with_no_extension,
1067 "https://www.domain.com/page?id=1234567890",
1068 "resource.script",
1069 "https://*.domain.com/page"
1070 );
1071
1072 span_description_test!(
1073 resource_script_with_no_domain,
1074 "/page.js?action=name",
1075 "resource.script",
1076 "/page.js"
1077 );
1078
1079 span_description_test!(
1080 resource_script_with_no_domain_no_extension,
1081 "/page?action=name",
1082 "resource.script",
1083 "/page"
1084 );
1085
1086 span_description_test!(
1087 resource_script_with_long_extension,
1088 "/path/to/file.thisismycustomfileextension2000",
1089 "resource.script",
1090 "/*/file.*"
1091 );
1092
1093 span_description_test!(
1094 resource_script_with_long_suffix,
1095 "/path/to/file.js~ri~some-_-1,,thing-_-words%2Fhere~ri~",
1096 "resource.script",
1097 "/*/file.js"
1098 );
1099
1100 span_description_test!(
1101 resource_script_with_tilde_extension,
1102 "/path/to/file.~~",
1103 "resource.script",
1104 "/*/file"
1105 );
1106
1107 span_description_test!(
1108 resource_img_extension,
1109 "http://domain.com/something.123",
1110 "resource.img",
1111 "http://domain.com/*.*"
1112 );
1113
1114 span_description_test!(
1115 resource_img_embedded,
1116 "",
1117 "resource.img",
1118 "data:image/svg+xml"
1119 );
1120
1121 span_description_test!(
1122 db_category_with_mongodb_query,
1123 "find({some_id:1234567890},{limit:100})",
1124 "db",
1125 ""
1126 );
1127
1128 span_description_test!(db_category_with_not_sql, "{someField:someValue}", "db", "");
1129
1130 span_description_test!(
1131 resource_img_semi_colon,
1132 "http://www.foo.com/path/to/resource;param1=test;param2=ing",
1133 "resource.img",
1134 "http://*.foo.com/*/*"
1135 );
1136
1137 span_description_test!(
1138 resource_img_comma_with_extension,
1139 "https://example.org/p/fit=cover,width=150,height=150,format=auto,quality=90/media/photosV2/weird-stuff-123-234-456.jpg",
1140 "resource.img",
1141 "https://example.org/*/media/*/*.jpg"
1142 );
1143
1144 span_description_test!(
1145 resource_script_comma_with_extension,
1146 "https://example.org/p/fit=cover,width=150,height=150,format=auto,quality=90/media/photosV2/weird-stuff-123-234-456.js",
1147 "resource.script",
1148 "https://example.org/*/media/*/weird-stuff-*-*-*.js"
1149 );
1150
1151 span_description_test!(
1152 resource_img_path_with_comma,
1153 "/help/purchase-details/1,*,0&fmt=webp&qlt=*,1&fit=constrain,0&op_sharpen=0&resMode=sharp2&iccEmbed=0&printRes=*",
1154 "resource.img",
1155 "/*/*"
1156 );
1157
1158 span_description_test!(
1159 resource_script_path_with_comma,
1160 "/help/purchase-details/1,*,0&fmt=webp&qlt=*,1&fit=constrain,0&op_sharpen=0&resMode=sharp2&iccEmbed=0&printRes=*",
1161 "resource.script",
1162 "/*/*"
1163 );
1164
1165 span_description_test!(
1166 resource_script_random_path_only,
1167 "/ERs-sUsu3/wd4/LyMTWg/Ot1Om4m8cu3p7a/QkJWAQ/FSYL/GBlxb3kB",
1168 "resource.script",
1169 "/*/*"
1170 );
1171
1172 span_description_test!(
1173 resource_script_normalize_domain,
1174 "https://sub.sub.sub.domain.com/resource.js",
1175 "resource.script",
1176 "https://*.domain.com/resource.js"
1177 );
1178
1179 span_description_test!(
1180 resource_script_extension_in_segment,
1181 "https://domain.com/foo.bar/resource.js",
1182 "resource.script",
1183 "https://domain.com/*/resource.js"
1184 );
1185
1186 span_description_test!(
1187 resource_script_missing_scheme,
1188 "domain.com/foo.bar/resource.js",
1189 "resource.script",
1190 "*/resource.js"
1191 );
1192
1193 span_description_test!(
1194 resource_script_missing_scheme_integer_id,
1195 "domain.com/zero-length-00",
1196 "resource.script",
1197 "*/zero-length-*"
1198 );
1199
1200 span_description_test!(db_prisma, "User find", "db.sql.prisma", "User find");
1201
1202 span_description_test!(
1203 function_python,
1204 "sentry.event_manager.assign_event_to_group",
1205 "function",
1206 "sentry.event_manager.assign_event_to_group"
1207 );
1208
1209 span_description_test!(
1210 function_rust,
1211 "symbolicator_native::symbolication::symbolicate::symbolicate",
1212 "function",
1213 "symbolicator_native::symbolication::symbolicate::symbolicate"
1214 );
1215
1216 span_description_test!(
1217 function_with_hex,
1218 "symbolicator_native::symbolication::symbolicate::deadbeef",
1219 "function",
1220 "symbolicator_native::symbolication::symbolicate::*"
1221 );
1222
1223 span_description_test!(
1224 function_with_uuid,
1225 "symbolicator_native::symbolication::fb37f08422034ee985e9fc553ef27e6e::symbolicate",
1226 "function",
1227 "symbolicator_native::symbolication::*::symbolicate"
1228 );
1229
1230 #[test]
1231 fn informed_sql_parser() {
1232 let json = r#"
1233 {
1234 "description": "SELECT \"not an identifier\"",
1235 "span_id": "bd2eb23da2beb459",
1236 "start_timestamp": 1597976393.4619668,
1237 "timestamp": 1597976393.4718769,
1238 "trace_id": "ff62a8b040f340bda5d830223def1d81",
1239 "op": "db",
1240 "data": {"db.system": "mysql"}
1241 }
1242 "#;
1243
1244 let mut span = Annotated::<Span>::from_json(json).unwrap();
1245 let span = span.value_mut().as_mut().unwrap();
1246 let scrubbed = scrub_span_description(span, &[]);
1247 assert_eq!(scrubbed.0.as_deref(), Some("SELECT %s"));
1248 }
1249
1250 #[test]
1251 fn active_record() {
1252 let json = r#"{
1253 "description": "/*some comment `my_function'*/ SELECT `a` FROM `b`",
1254 "op": "db.sql.activerecord"
1255 }"#;
1256
1257 let mut span = Annotated::<Span>::from_json(json).unwrap();
1258
1259 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1260
1261 assert!(scrubbed.0.is_none());
1263 }
1264
1265 #[test]
1266 fn active_record_with_db_system() {
1267 let json = r#"{
1268 "description": "/*some comment `my_function'*/ SELECT `a` FROM `b`",
1269 "op": "db.sql.activerecord",
1270 "data": {
1271 "db.system": "mysql"
1272 }
1273 }"#;
1274
1275 let mut span = Annotated::<Span>::from_json(json).unwrap();
1276
1277 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1278
1279 assert_eq!(scrubbed.0.as_deref(), Some("SELECT a FROM b"));
1281 }
1282
1283 #[test]
1284 fn redis_with_db_system() {
1285 let json = r#"{
1286 "description": "del myveryrandomkey:123Xalsdkxfhn",
1287 "op": "db",
1288 "data": {
1289 "db.system": "redis"
1290 }
1291 }"#;
1292
1293 let mut span = Annotated::<Span>::from_json(json).unwrap();
1294
1295 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1296
1297 assert_eq!(scrubbed.0.as_deref(), Some("DEL *"));
1298 }
1299
1300 #[test]
1301 fn core_data() {
1302 let json = r#"{
1303 "description": "INSERTED 1 'UAEventData'",
1304 "op": "db.sql.transaction",
1305 "origin": "auto.db.core_data"
1306 }"#;
1307
1308 let mut span = Annotated::<Span>::from_json(json).unwrap();
1309
1310 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1311
1312 assert_eq!(scrubbed.0.as_deref(), Some("INSERTED * 'UAEventData'"));
1313 }
1314
1315 #[test]
1316 fn multiple_core_data() {
1317 let json = r#"{
1318 "description": "UPDATED 1 'QueuedRequest', DELETED 1 'QueuedRequest'",
1319 "op": "db.sql.transaction",
1320 "origin": "auto.db.core_data"
1321 }"#;
1322
1323 let mut span = Annotated::<Span>::from_json(json).unwrap();
1324
1325 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1326
1327 assert_eq!(
1328 scrubbed.0.as_deref(),
1329 Some("UPDATED * 'QueuedRequest', DELETED * 'QueuedRequest'")
1330 );
1331 }
1332
1333 #[test]
1334 fn mongodb_scrubbing() {
1335 let json = r#"{
1336 "description": "{\"find\": \"documents\", \"foo\": \"bar\"}",
1337 "op": "db",
1338 "data": {
1339 "db.system": "mongodb",
1340 "db.operation": "find",
1341 "db.collection.name": "documents"
1342 }
1343 }"#;
1344
1345 let mut span = Annotated::<Span>::from_json(json).unwrap();
1346
1347 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1348
1349 assert_eq!(
1350 scrubbed.0.as_deref(),
1351 Some(r#"{"find":"documents","foo":"?"}"#)
1352 )
1353 }
1354
1355 #[test]
1356 fn mongodb_with_legacy_collection_property() {
1357 let json = r#"{
1358 "description": "{\"find\": \"documents\", \"foo\": \"bar\"}",
1359 "op": "db",
1360 "data": {
1361 "db.system": "mongodb",
1362 "db.operation": "find",
1363 "db.mongodb.collection": "documents"
1364 }
1365 }"#;
1366
1367 let mut span = Annotated::<Span>::from_json(json).unwrap();
1368
1369 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1370
1371 assert_eq!(
1372 scrubbed.0.as_deref(),
1373 Some(r#"{"find":"documents","foo":"?"}"#)
1374 )
1375 }
1376
1377 #[test]
1378 fn ui_interaction_with_component_name() {
1379 let json = r#"{
1380 "description": "input.app-asdfasfg.asdfasdf[type=\"range\"][name=\"replay-timeline\"]",
1381 "op": "ui.interaction.click",
1382 "data": {
1383 "ui.component_name": "my-component-name"
1384 }
1385 }"#;
1386
1387 let mut span = Annotated::<Span>::from_json(json).unwrap();
1388
1389 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1390
1391 assert_eq!(scrubbed.0.as_deref(), Some("my-component-name"));
1393 }
1394
1395 #[test]
1396 fn scrub_allowed_host() {
1397 let examples = [
1398 (
1399 "https://foo.bar.internal/api/v1/submit",
1400 ["foo.bar.internal".to_string()],
1401 "https://foo.bar.internal",
1402 ),
1403 (
1404 "http://192.168.1.1:3000",
1405 ["192.168.1.1".to_string()],
1406 "http://192.168.1.1:3000",
1407 ),
1408 (
1409 "http://[1fff:0:a88:85a3::ac1f]:8001/foo",
1410 ["[1fff:0:a88:85a3::ac1f]".to_string()],
1411 "http://[1fff:0:a88:85a3::ac1f]:8001",
1412 ),
1413 ];
1414
1415 for (url, allowed_hosts, expected) in examples {
1416 let json = format!(
1417 r#"{{
1418 "description": "POST {}",
1419 "span_id": "bd2eb23da2beb459",
1420 "start_timestamp": 1597976393.4619668,
1421 "timestamp": 1597976393.4718769,
1422 "trace_id": "ff62a8b040f340bda5d830223def1d81",
1423 "op": "http.client"
1424 }}
1425 "#,
1426 url,
1427 );
1428
1429 let mut span = Annotated::<Span>::from_json(&json).unwrap();
1430
1431 let scrubbed =
1432 scrub_span_description(span.value_mut().as_mut().unwrap(), &allowed_hosts);
1433
1434 assert_eq!(
1435 scrubbed.0.as_deref(),
1436 Some(format!("POST {}", expected).as_str()),
1437 "Could not match {url}"
1438 );
1439 }
1440 }
1441
1442 macro_rules! mongodb_scrubbing_test {
1443 ($name:ident, $description_in:expr, $operation_in:literal, $collection_in:literal, $expected:literal) => {
1448 #[test]
1449 fn $name() {
1450 let json = format!(
1451 r#"
1452 {{
1453 "description": "",
1454 "span_id": "bd2eb23da2beb459",
1455 "start_timestamp": 1597976393.4619668,
1456 "timestamp": 1597976393.4718769,
1457 "trace_id": "ff62a8b040f340bda5d830223def1d81",
1458 "op": "db",
1459 "data": {{
1460 "db.system": "mongodb",
1461 "db.operation": {},
1462 "db.collection.name": {}
1463 }}
1464 }}
1465 "#,
1466 if $operation_in == "" {
1467 "null".to_string()
1468 } else {
1469 format!("\"{}\"", $operation_in)
1470 },
1471 if $collection_in == "" {
1472 "null".to_string()
1473 } else {
1474 format!("\"{}\"", $collection_in)
1475 }
1476 );
1477
1478 let mut span = Annotated::<Span>::from_json(&json).unwrap();
1479 span.value_mut()
1480 .as_mut()
1481 .unwrap()
1482 .description
1483 .set_value(Some($description_in.into()));
1484
1485 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1486
1487 if $expected == "" {
1488 assert!(scrubbed.0.is_none());
1489 } else {
1490 assert_eq!($expected, scrubbed.0.unwrap());
1491 }
1492 }
1493 };
1494 }
1495
1496 mongodb_scrubbing_test!(
1497 mongodb_basic_query,
1498 r#"{"find": "documents", "showRecordId": true}"#,
1499 "find",
1500 "documents",
1501 r#"{"find":"documents","showRecordId":"?"}"#
1502 );
1503
1504 mongodb_scrubbing_test!(
1505 mongodb_query_with_document_param,
1506 r#"{"find": "documents", "filter": {"foo": "bar"}}"#,
1507 "find",
1508 "documents",
1509 r#"{"filter":{"foo":"?"},"find":"documents"}"#
1510 );
1511
1512 mongodb_scrubbing_test!(
1513 mongodb_query_without_operation,
1514 r#"{"filter": {"foo": "bar"}}"#,
1515 "find",
1516 "documents",
1517 r#"{"filter":{"foo":"?"},"find":"documents"}"#
1518 );
1519
1520 mongodb_scrubbing_test!(
1521 mongodb_without_collection_in_data,
1522 r#"{"find": "documents", "showRecordId": true}"#,
1523 "find",
1524 "",
1525 ""
1526 );
1527
1528 mongodb_scrubbing_test!(
1529 mongodb_without_operation_in_data,
1530 r#"{"find": "documents", "showRecordId": true}"#,
1531 "",
1532 "documents",
1533 ""
1534 );
1535
1536 mongodb_scrubbing_test!(
1537 mongodb_max_depth,
1538 r#"{"update": "coll", "updates": {"q": {"_id": "1"}, "u": {"$set": {"foo": {"bar": {"baz": "quux"}}}}}}"#,
1539 "update",
1540 "coll",
1541 r#"{"update":"coll","updates":{"q":{"_id":"?"},"u":{"$set":{"foo":"?"}}}}"#
1542 );
1543
1544 mongodb_scrubbing_test!(
1545 mongodb_identifier_in_collection,
1546 r#"{"find": "documents001", "showRecordId": true}"#,
1547 "find",
1548 "documents001",
1549 r#"{"find":"documents{%s}","showRecordId":"?"}"#
1550 );
1551
1552 mongodb_scrubbing_test!(
1553 mongodb_query_with_array,
1554 r#"{"insert": "documents", "documents": [{"foo": "bar"}, {"baz": "quux"}, {"qux": "quuz"}]}"#,
1555 "insert",
1556 "documents",
1557 r#"{"documents":["..."],"insert":"documents"}"#
1558 );
1559
1560 mongodb_scrubbing_test!(
1561 mongodb_query_with_buffer,
1562 r#"{"insert": "documents", "buffer": {"0": "a", "1": "b", "2": "c"}, "documents": [{"foo": "bar"}]}"#,
1563 "insert",
1564 "documents",
1565 r#"{"documents":["..."],"insert":"documents"}"#
1566 );
1567}