1mod redis;
3mod resource;
4mod sql;
5use psl;
6use relay_filter::matches_any_origin;
7use serde_json::Value;
8#[cfg(test)]
9pub use sql::{Mode, scrub_queries};
10use std::sync::LazyLock;
11
12use relay_event_schema::protocol::Span;
13use std::borrow::Cow;
14use std::net::{Ipv4Addr, Ipv6Addr};
15use std::path::Path;
16use url::{Host, Url};
17
18use crate::regexes::{
19 DB_SQL_TRANSACTION_CORE_DATA_REGEX, DB_SUPABASE_REGEX, FUNCTION_NORMALIZER_REGEX,
20 RESOURCE_NORMALIZER_REGEX,
21};
22use crate::span::TABLE_NAME_REGEX;
23use crate::span::description::redis::matching_redis_command;
24use crate::span::description::resource::COMMON_PATH_SEGMENTS;
25use crate::span::tag_extraction::HTTP_METHOD_EXTRACTOR_REGEX;
26
27static DUMMY_BASE_URL: LazyLock<Url> = LazyLock::new(|| "http://replace_me".parse().unwrap());
29
30const MAX_SEGMENT_LENGTH: usize = 25;
34
35const MAX_EXTENSION_LENGTH: usize = 10;
37
38const DOMAIN_ALLOW_LIST: &[&str] = &["localhost"];
40
41pub(crate) fn scrub_span_description(
45 span: &Span,
46 span_allowed_hosts: &[String],
47) -> (Option<String>, Option<Vec<sqlparser::ast::Statement>>) {
48 let Some(description) = span.description.as_str() else {
49 return (None, None);
50 };
51
52 let data = span.data.value();
53
54 let db_system = data
55 .and_then(|data| data.db_system.value())
56 .and_then(|system| system.as_str());
57 let span_origin = span.origin.as_str();
58
59 let mut parsed_sql = None;
60 let scrubbed_description = span
61 .op
62 .as_str()
63 .map(|op| op.split_once('.').unwrap_or((op, "")))
64 .and_then(|(op, sub)| match (op, sub) {
65 ("http", _) => {
66 let (method, url) = description.split_once(' ')?;
67 scrub_http(method, url, span_allowed_hosts)
68 }
69 ("cache", _) => scrub_redis_keys(description),
70 ("db", sub) => {
71 let db_operation = data
72 .and_then(|data| data.db_operation.value())
73 .and_then(|op| op.as_str());
74
75 let collection_name = data
76 .and_then(|data| data.db_collection_name.value())
77 .and_then(|collection| collection.as_str());
78
79 let (scrubbed, parsed_sql_statement) = scrub_db_query(
80 description,
81 sub,
82 db_system,
83 db_operation,
84 collection_name,
85 span_origin,
86 );
87
88 parsed_sql = parsed_sql_statement;
89
90 scrubbed
91 }
92 ("resource", ty) => scrub_resource(ty, description),
93 ("ai", sub) => match sub.split_once('.').unwrap_or((sub, "")) {
94 ("run" | "pipeline", _) => {
95 Some(description.to_owned())
98 }
99 _ => None,
100 },
101 ("ui", "load") => {
102 Some(description.to_owned())
105 }
106 ("ui", sub) if sub.starts_with("interaction.") || sub.starts_with("react.") => data
107 .and_then(|data| data.ui_component_name.value())
108 .and_then(|value| value.as_str())
109 .map(String::from),
110 ("app", _) => {
111 Some(description.to_owned())
115 }
116 ("contentprovider", "load") => {
117 Some(description.to_owned())
122 }
123 ("application", "load") => {
124 Some(description.to_owned())
129 }
130 ("activity", "load") => {
131 Some(description.to_owned())
135 }
136 ("file", _) => scrub_file(description),
137 ("function", _) => scrub_function(description),
138 _ => None,
139 });
140 (scrubbed_description, parsed_sql)
141}
142
143pub fn scrub_db_query(
147 raw_query: &str,
148 sub_op: &str,
149 db_system: Option<&str>,
150 db_operation: Option<&str>,
151 collection_name: Option<&str>,
152 span_origin: Option<&str>,
153) -> (Option<String>, Option<Vec<sqlparser::ast::Statement>>) {
154 let mut parsed_sql = None;
155
156 let scrubbed = if db_system == Some("redis") || sub_op == "redis" {
157 scrub_redis_keys(raw_query)
158 } else if db_system == Some("mongodb") {
159 if let (Some(command), Some(collection)) = (db_operation, collection_name) {
160 scrub_mongodb_query(raw_query, command, collection)
161 } else {
162 None
163 }
164 } else if sub_op.contains("clickhouse")
165 || sub_op.contains("mongodb")
166 || sub_op.contains("redis")
167 || is_legacy_activerecord(sub_op, db_system)
168 || is_sql_mongodb(raw_query, db_system)
169 {
170 None
171 } else if span_origin == Some("auto.db.core_data") {
172 scrub_core_data(raw_query)
174 } else if sub_op.contains("prisma") {
175 Some(raw_query.to_owned())
179 } else if span_origin == Some("auto.db.supabase") && raw_query.starts_with("from(") {
180 scrub_supabase(raw_query)
184 } else {
185 let (scrubbed, mode) = sql::scrub_queries(db_system, raw_query);
186 if let sql::Mode::Parsed(ast) = mode {
187 parsed_sql = Some(ast);
188 }
189 scrubbed
190 };
191
192 (scrubbed, parsed_sql)
193}
194
195fn is_sql_mongodb(description: &str, db_system: Option<&str>) -> bool {
197 description.contains("\"$")
198 || description.contains("({")
199 || description.contains("[{")
200 || description.starts_with('{')
201 || db_system == Some("mongodb")
202}
203
204fn is_legacy_activerecord(sub_op: &str, db_system: Option<&str>) -> bool {
206 db_system.is_none() && (sub_op.contains("active_record") || sub_op.contains("activerecord"))
207}
208
209fn scrub_core_data(string: &str) -> Option<String> {
210 match DB_SQL_TRANSACTION_CORE_DATA_REGEX.replace_all(string, "*") {
211 Cow::Owned(scrubbed) => Some(scrubbed),
212 Cow::Borrowed(_) => None,
213 }
214}
215
216fn scrub_supabase(string: &str) -> Option<String> {
217 Some(DB_SUPABASE_REGEX.replace_all(string, "{%s}").into())
218}
219
220pub fn scrub_http(method: &str, url: &str, allow_list: &[String]) -> Option<String> {
223 if !HTTP_METHOD_EXTRACTOR_REGEX.is_match(method) {
224 return None;
225 };
226
227 if url.starts_with("data:image/") {
228 return Some(format!("{method} data:image/*"));
229 }
230
231 let scrubbed = match Url::parse(url) {
232 Ok(url) => {
233 let scheme = url.scheme();
234 let scrubbed_host = url.host().map(|host| scrub_host(host, allow_list));
235 let domain = concatenate_host_and_port(scrubbed_host.as_deref(), url.port());
236
237 format!("{method} {scheme}://{domain}")
238 }
239 Err(_) => {
240 format!("{method} *")
241 }
242 };
243
244 Some(scrubbed)
245}
246
247fn scrub_file(description: &str) -> Option<String> {
248 let filename = match description.split_once(' ') {
249 Some((filename, _)) => filename,
250 _ => description,
251 };
252 match Path::new(filename).extension() {
253 Some(extension) => {
254 let ext = scrub_resource_file_extension(extension.to_str()?);
255 if ext != "*" {
256 Some(format!("*.{ext}"))
257 } else {
258 Some("*".to_owned())
259 }
260 }
261 _ => Some("*".to_owned()),
262 }
263}
264
265pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [String]) -> Cow<'a, str> {
282 let allow_list: Vec<_> = allow_list
283 .iter()
284 .map(|origin| origin.as_str().into())
285 .collect();
286
287 if matches_any_origin(Some(host.to_string().as_str()), &allow_list) {
288 return host.to_string().into();
289 }
290
291 match host {
292 Host::Ipv4(ip) => Cow::Borrowed(scrub_ipv4(ip)),
293 Host::Ipv6(ip) => Cow::Borrowed(scrub_ipv6(ip)),
294 Host::Domain(domain) => scrub_domain_name(domain),
295 }
296}
297
298pub fn scrub_ipv4(ip: Ipv4Addr) -> &'static str {
313 match ip {
314 Ipv4Addr::LOCALHOST => "127.0.0.1",
315 _ => "*.*.*.*",
316 }
317}
318
319pub fn scrub_ipv6(ip: Ipv6Addr) -> &'static str {
331 match ip {
332 Ipv6Addr::LOCALHOST => "::1",
333 _ => "*:*:*:*:*:*:*:*",
334 }
335}
336
337pub fn scrub_domain_name(domain: &str) -> Cow<'_, str> {
352 if DOMAIN_ALLOW_LIST.contains(&domain) {
353 return Cow::Borrowed(domain);
354 }
355
356 let parsed_domain = psl::domain(domain.as_bytes());
357
358 let Some(parsed_domain) = parsed_domain else {
359 return Cow::Borrowed(domain);
361 };
362
363 let suffix = parsed_domain.suffix().as_bytes();
364 let Some(second_level_domain) = parsed_domain.as_bytes().strip_suffix(suffix) else {
365 return Cow::Borrowed(domain);
366 };
367
368 let subdomain = domain
369 .as_bytes()
370 .strip_suffix(suffix)
371 .and_then(|s| s.strip_suffix(second_level_domain));
372
373 match subdomain {
374 None | Some(b"") => Cow::Borrowed(domain),
375 Some(_subdomain) => {
376 let scrubbed = [b"*.", second_level_domain, suffix].concat();
377 match String::from_utf8(scrubbed) {
378 Ok(s) => Cow::Owned(s),
379 Err(_) => Cow::Borrowed(domain),
380 }
381 }
382 }
383}
384
385pub fn concatenate_host_and_port(host: Option<&str>, port: Option<u16>) -> Cow<'_, str> {
399 match (host, port) {
400 (None, _) => Cow::Borrowed(""),
401 (Some(host), None) => Cow::Borrowed(host),
402 (Some(host), Some(port)) => Cow::Owned(format!("{host}:{port}")),
403 }
404}
405
406fn scrub_redis_keys(string: &str) -> Option<String> {
407 let string = string.trim();
408 Some(match matching_redis_command(string) {
409 Some(command) => {
410 let mut command = command.to_uppercase();
411 match string.get(command.len()..) {
412 None | Some("") => command,
413 Some(_other) => {
414 command.push_str(" *");
415 command
416 }
417 }
418 }
419 None => "*".to_owned(),
420 })
421}
422
423enum UrlType {
424 Full,
426 Absolute,
428 Relative,
430}
431
432fn scrub_resource(resource_type: &str, string: &str) -> Option<String> {
434 let (url, ty) = match Url::parse(string) {
435 Ok(url) => (url, UrlType::Full),
436 Err(url::ParseError::RelativeUrlWithoutBase) => {
437 match Url::options().base_url(Some(&DUMMY_BASE_URL)).parse(string) {
439 Ok(url) => (
440 url,
441 if string.starts_with('/') {
442 UrlType::Absolute
443 } else {
444 UrlType::Relative
445 },
446 ),
447 Err(_) => return None,
448 }
449 }
450 Err(_) => {
451 return None;
452 }
453 };
454
455 let formatted = match url.scheme() {
456 "data" => match url.path().split_once(';') {
457 Some((ty, _data)) => format!("data:{ty}"),
458 None => "data:*/*".to_owned(),
459 },
460 "chrome-extension" | "moz-extension" | "ms-browser-extension" => {
461 return Some("browser-extension://*".to_owned());
462 }
463 scheme => {
464 let scrubbed_host = url.host().map(|host| scrub_host(host, &[]));
465 let domain = concatenate_host_and_port(scrubbed_host.as_deref(), url.port());
466
467 let segment_count = url.path_segments().map(|s| s.count()).unwrap_or_default();
468 let mut output_segments = vec![];
469 for (i, segment) in url.path_segments().into_iter().flatten().enumerate() {
470 if i + 1 == segment_count {
471 break;
472 }
473 if COMMON_PATH_SEGMENTS.contains(segment) {
474 output_segments.push(segment);
475 } else if output_segments.last().is_none_or(|s| *s != "*") {
476 output_segments.push("*");
478 }
479 }
480
481 let segments = output_segments.join("/");
482
483 let last_segment = url
484 .path_segments()
485 .and_then(|mut s| s.next_back())
486 .unwrap_or_default();
487 let last_segment = scrub_resource_filename(resource_type, last_segment);
488
489 if segments.is_empty() {
490 format!("{scheme}://{domain}/{last_segment}")
491 } else {
492 format!("{scheme}://{domain}/{segments}/{last_segment}")
493 }
494 }
495 };
496
497 let formatted = match ty {
499 UrlType::Full => formatted,
500 UrlType::Absolute => formatted.replace("http://replace_me", ""),
501 UrlType::Relative => formatted.replace("http://replace_me/", ""),
502 };
503
504 Some(formatted)
505}
506
507fn scrub_resource_filename<'a>(ty: &str, path: &'a str) -> Cow<'a, str> {
508 if path.is_empty() {
509 return Cow::Borrowed("");
510 }
511 let (mut basename, mut extension) = path.rsplit_once('.').unwrap_or((path, ""));
512 if extension.contains('/') {
513 basename = path;
515 extension = "";
516 }
517
518 let extension = scrub_resource_file_extension(extension);
519
520 let basename = if ty == "img" {
521 Cow::Borrowed("*")
522 } else {
523 scrub_resource_segment(basename)
524 };
525
526 if extension.is_empty() {
527 basename
528 } else {
529 let mut filename = basename.to_string();
530 filename.push('.');
531 filename.push_str(extension);
532 Cow::Owned(filename)
533 }
534}
535
536fn scrub_resource_segment(segment: &str) -> Cow<'_, str> {
537 let segment = RESOURCE_NORMALIZER_REGEX.replace_all(segment, "$pre*$post");
538
539 if segment.len() > MAX_SEGMENT_LENGTH {
541 return Cow::Borrowed("*");
542 }
543
544 let mut all_alphabetic = true;
545 let mut found_uppercase = false;
546
547 for char in segment.chars() {
549 if !char.is_ascii_alphabetic() {
550 all_alphabetic = false;
551 }
552 if char.is_ascii_uppercase() {
553 found_uppercase = true;
554 }
555 if char.is_numeric() || "&%#=+@".contains(char) {
556 return Cow::Borrowed("*");
557 };
558 }
559
560 if all_alphabetic && found_uppercase {
561 return Cow::Borrowed("*");
563 }
564
565 segment
566}
567
568fn scrub_resource_file_extension(mut extension: &str) -> &str {
569 let mut digits = 0;
571 for (i, byte) in extension.bytes().enumerate() {
572 if byte.is_ascii_digit() {
573 digits += 1;
574 }
575 if digits > 1 {
576 return "*";
578 }
579 if !byte.is_ascii_alphanumeric() {
580 extension = &extension[..i];
581 break;
582 }
583 }
584
585 if extension.len() > MAX_EXTENSION_LENGTH {
586 extension = "*";
587 }
588
589 extension
590}
591
592fn scrub_function(string: &str) -> Option<String> {
593 Some(FUNCTION_NORMALIZER_REGEX.replace_all(string, "*").into())
594}
595
596fn scrub_mongodb_query(query: &str, command: &str, collection: &str) -> Option<String> {
597 let mut query: Value = serde_json::from_str(query).ok()?;
598
599 let root = query.as_object_mut()?;
600
601 root.remove("buffer");
603
604 for value in root.values_mut() {
605 scrub_mongodb_visit_node(value, 3);
606 }
607
608 let scrubbed_collection_name =
609 if let Cow::Owned(s) = TABLE_NAME_REGEX.replace_all(collection, "{%s}") {
610 s
611 } else {
612 collection.to_owned()
613 };
614 root.insert(command.to_owned(), Value::String(scrubbed_collection_name));
615
616 Some(query.to_string())
617}
618
619fn scrub_mongodb_visit_node(value: &mut Value, recursion_limit: usize) {
620 if recursion_limit == 0 {
621 match value {
622 Value::String(str) => {
623 str.clear();
624 str.push('?');
625 }
626 value => *value = Value::String("?".to_owned()),
627 }
628 return;
629 }
630
631 match value {
632 Value::Object(map) => {
633 for value in map.values_mut() {
634 scrub_mongodb_visit_node(value, recursion_limit - 1);
635 }
636 }
637 Value::Array(arr) => {
638 arr.clear();
639 arr.push(Value::String("...".to_owned()));
640 }
641 Value::String(str) => {
642 str.clear();
643 str.push('?');
644 }
645 value => *value = Value::String("?".to_owned()),
646 }
647}
648
649#[cfg(test)]
650mod tests {
651 use super::*;
652 use relay_protocol::Annotated;
653 use similar_asserts::assert_eq;
654
655 macro_rules! span_description_test {
656 ($name:ident, $description_in:expr, $op_in:literal, $expected:literal) => {
661 #[test]
662 fn $name() {
663 let json = format!(
664 r#"
665 {{
666 "description": "",
667 "span_id": "bd2eb23da2beb459",
668 "start_timestamp": 1597976393.4619668,
669 "timestamp": 1597976393.4718769,
670 "trace_id": "ff62a8b040f340bda5d830223def1d81",
671 "op": "{}"
672 }}
673 "#,
674 $op_in
675 );
676
677 let mut span = Annotated::<Span>::from_json(&json).unwrap();
678 span.value_mut()
679 .as_mut()
680 .unwrap()
681 .description
682 .set_value(Some($description_in.into()));
683
684 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
685
686 if $expected == "" {
687 assert!(scrubbed.0.is_none());
688 } else {
689 assert_eq!($expected, scrubbed.0.unwrap());
690 }
691 }
692 };
693 }
694
695 macro_rules! span_description_test_with_lowercase {
696 ($name:ident, $name2:ident, $description_in:expr, $op_in:literal, $expected:literal) => {
697 span_description_test!($name, $description_in, $op_in, $expected);
698 span_description_test!($name2, ($description_in).to_lowercase(), $op_in, $expected);
699 };
700 }
701
702 span_description_test!(empty, "", "http.client", "");
703
704 span_description_test!(
705 only_domain,
706 "GET http://service.io",
707 "http.client",
708 "GET http://service.io"
709 );
710
711 span_description_test!(
712 only_urllike_on_http_ops,
713 "GET https://www.service.io/resources/01234",
714 "http.client",
715 "GET https://*.service.io"
716 );
717
718 span_description_test!(
719 path_ids_end,
720 "GET https://www.service.io/resources/01234",
721 "http.client",
722 "GET https://*.service.io"
723 );
724
725 span_description_test!(
726 path_ids_middle,
727 "GET https://www.service.io/resources/01234/details",
728 "http.client",
729 "GET https://*.service.io"
730 );
731
732 span_description_test!(
733 path_multiple_ids,
734 "GET https://www.service.io/users/01234-qwerty/settings/98765-adfghj",
735 "http.client",
736 "GET https://*.service.io"
737 );
738
739 span_description_test!(
740 localhost,
741 "GET https://localhost/data",
742 "http.client",
743 "GET https://localhost"
744 );
745
746 span_description_test!(
747 loopback,
748 "GET https://127.0.0.1/data",
749 "http.client",
750 "GET https://127.0.0.1"
751 );
752
753 span_description_test!(
754 ip_address,
755 "GET https://8.8.8.8/data",
756 "http.client",
757 "GET https://*.*.*.*"
758 );
759
760 span_description_test!(
761 path_md5_hashes,
762 "GET /clients/563712f9722fb0996ac8f3905b40786f/project/01234",
763 "http.client",
764 "GET *"
765 );
766
767 span_description_test!(
768 path_sha_hashes,
769 "GET /clients/403926033d001b5279df37cbbe5287b7c7c267fa/project/01234",
770 "http.client",
771 "GET *"
772 );
773
774 span_description_test!(
775 hex,
776 "GET /shop/de/f43/beef/3D6/my-beef",
777 "http.client",
778 "GET *"
779 );
780
781 span_description_test!(
782 path_uuids,
783 "GET /clients/8ff81d74-606d-4c75-ac5e-cee65cbbc866/project/01234",
784 "http.client",
785 "GET *"
786 );
787
788 span_description_test!(
789 data_images,
790 "GET data:image/png;base64,drtfghaksjfdhaeh/blah/blah/blah",
791 "http.client",
792 "GET data:image/*"
793 );
794
795 span_description_test!(
796 simple_cctld,
797 "GET http://bbc.co.uk",
798 "http.client",
799 "GET http://bbc.co.uk"
800 );
801
802 span_description_test!(
803 longer_cctld,
804 "GET http://www.radio1.bbc.co.uk",
805 "http.client",
806 "GET http://*.bbc.co.uk"
807 );
808
809 span_description_test!(
810 complicated_tld,
811 "GET https://application.www.xn--85x722f.xn--55qx5d.cn",
812 "http.client",
813 "GET https://*.xn--85x722f.xn--55qx5d.cn"
814 );
815
816 span_description_test!(
817 only_dblike_on_db_ops,
818 "SELECT count() FROM table WHERE id IN (%s, %s)",
819 "http.client",
820 ""
821 );
822
823 span_description_test_with_lowercase!(
824 cache,
825 cache_lower,
826 "GET abc:12:{def}:{34}:{fg56}:EAB38:zookeeper",
827 "cache.get_item",
828 "GET *"
829 );
830
831 span_description_test_with_lowercase!(
832 redis_set,
833 redis_set_lower,
834 "SET mykey myvalue",
835 "db.redis",
836 "SET *"
837 );
838
839 span_description_test_with_lowercase!(
840 redis_set_quoted,
841 redis_set_quoted_lower,
842 r#"SET mykey 'multi: part, value'"#,
843 "db.redis",
844 "SET *"
845 );
846
847 span_description_test_with_lowercase!(
848 redis_whitespace,
849 redis_whitespace_lower,
850 " GET asdf:123",
851 "db.redis",
852 "GET *"
853 );
854
855 span_description_test_with_lowercase!(
856 redis_no_args,
857 redis_no_args_lower,
858 "EXEC",
859 "db.redis",
860 "EXEC"
861 );
862
863 span_description_test_with_lowercase!(
864 redis_invalid,
865 redis_invalid_lower,
866 "What a beautiful day!",
867 "db.redis",
868 "*"
869 );
870
871 span_description_test_with_lowercase!(
872 redis_long_command,
873 redis_long_command_lower,
874 "ACL SETUSER jane",
875 "db.redis",
876 "ACL SETUSER *"
877 );
878
879 span_description_test!(
880 nothing_cache,
881 "abc-dontscrubme-meneither:stillno:ohplsstop",
882 "cache.get_item",
883 "*"
884 );
885
886 span_description_test!(
887 resource_script,
888 "https://example.com/static/chunks/vendors-node_modules_somemodule_v1.2.3_mini-dist_index_js-client_dist-6c733292-f3cd-11ed-a05b-0242ac120003-0dc369dcf3d311eda05b0242ac120003.[hash].abcd1234.chunk.js-0242ac120003.map",
889 "resource.script",
890 "https://example.com/static/chunks/*.map"
891 );
892
893 span_description_test!(
894 resource_script_numeric_filename,
895 "https://example.com/static/chunks/09876543211234567890",
896 "resource.script",
897 "https://example.com/static/chunks/*"
898 );
899
900 span_description_test!(
901 resource_next_chunks,
902 "/_next/static/chunks/12345-abcdef0123456789.js",
903 "resource.script",
904 "/_next/static/chunks/*-*.js"
905 );
906
907 span_description_test!(
908 resource_next_media,
909 "/_next/static/media/Some_Font-Bold.0123abcd.woff2",
910 "resource.css",
911 "/_next/static/media/Some_Font-Bold.*.woff2"
912 );
913
914 span_description_test!(
915 resource_css,
916 "https://example.com/assets/dark_high_contrast-764fa7c8-f3cd-11ed-a05b-0242ac120003.css",
917 "resource.css",
918 "https://example.com/assets/dark_high_contrast-*.css"
919 );
920
921 span_description_test!(
922 integer_in_resource,
923 "https://example.com/assets/this_is-a_good_resource-123-scrub_me.js",
924 "resource.css",
925 "https://example.com/assets/*.js"
926 );
927
928 span_description_test!(
929 resource_query_params,
930 "/organization-avatar/123/?s=120",
931 "resource.img",
932 "/*/"
933 );
934
935 span_description_test!(
936 resource_query_params2,
937 "https://data.domain.com/data/guide123.gif?jzb=3f535634H467g5-2f256f&ct=1234567890&v=1.203.0_prod",
938 "resource.img",
939 "https://*.domain.com/data/*.gif"
940 );
941
942 span_description_test!(
943 resource_query_params2_script,
944 "https://data.domain.com/data/guide123.js?jzb=3f535634H467g5-2f256f&ct=1234567890&v=1.203.0_prod",
945 "resource.script",
946 "https://*.domain.com/data/guide*.js"
947 );
948
949 span_description_test!(
950 resource_no_ids,
951 "https://data.domain.com/js/guide.js",
952 "resource.script",
953 "https://*.domain.com/js/guide.js"
954 );
955
956 span_description_test!(
957 resource_no_ids_img_known_segment,
958 "https://data.domain.com/data/guide.gif",
959 "resource.img",
960 "https://*.domain.com/data/*.gif"
961 );
962
963 span_description_test!(
964 resource_no_ids_img,
965 "https://data.domain.com/something/guide.gif",
966 "resource.img",
967 "https://*.domain.com/*/*.gif"
968 );
969
970 span_description_test!(
971 resource_webpack,
972 "https://domain.com/path/to/app-1f90d5.f012d11690e188c96fe6.js",
973 "resource.js",
974 "https://domain.com/*/app-*.*.js"
975 );
976
977 span_description_test!(
978 resource_vite,
979 "webroot/assets/Profile-73f6525d.js",
980 "resource.js",
981 "*/assets/Profile-*.js"
982 );
983
984 span_description_test!(
985 resource_vite_css,
986 "webroot/assets/Shop-1aff80f7.css",
987 "resource.css",
988 "*/assets/Shop-*.css"
989 );
990
991 span_description_test!(
992 chrome_extension,
993 "chrome-extension://begnopegbbhjeeiganiajffnalhlkkjb/img/assets/icon-10k.svg",
994 "resource.other",
995 "browser-extension://*"
996 );
997
998 span_description_test!(
999 urlencoded_path_segments,
1000 "https://some.domain.com/embed/%2Fembed%2Fdashboards%2F20%3FSlug%3Dsomeone%*hide_title%3Dtrue",
1001 "resource.iframe",
1002 "https://*.domain.com/*/*"
1003 );
1004
1005 span_description_test!(
1006 random_string1,
1007 "https://static.domain.com/6gezWf_qs4Wc12Nz9rpLOx2aw2k/foo-99",
1008 "resource.img",
1009 "https://*.domain.com/*/*"
1010 );
1011
1012 span_description_test!(
1013 random_string1_script,
1014 "https://static.domain.com/6gezWf_qs4Wc12Nz9rpLOx2aw2k/foo-99",
1015 "resource.script",
1016 "https://*.domain.com/*/foo-*"
1017 );
1018
1019 span_description_test!(
1020 random_string2,
1021 "http://domain.com/fy2XSqBMqkEm_qZZH3RrzvBTKg4/qltdXIJWTF_cuwt3uKmcwWBc1DM/z1a--BVsUI_oyUjJR12pDBcOIn5.dom.jsonp",
1022 "resource.script",
1023 "http://domain.com/*/*.jsonp"
1024 );
1025
1026 span_description_test!(
1027 random_string3,
1028 "jkhdkkncnoglghljlkmcimlnlhkeamab/123.css",
1029 "resource.link",
1030 "*/*.css"
1031 );
1032
1033 span_description_test!(
1034 ui_load,
1035 "ListAppViewController",
1036 "ui.load",
1037 "ListAppViewController"
1038 );
1039
1040 span_description_test!(
1041 contentprovider_load,
1042 "io.sentry.android.core.SentryPerformanceProvider.onCreate",
1043 "contentprovider.load",
1044 "io.sentry.android.core.SentryPerformanceProvider.onCreate"
1045 );
1046
1047 span_description_test!(
1048 application_load,
1049 "io.sentry.samples.android.MyApplication.onCreate",
1050 "application.load",
1051 "io.sentry.samples.android.MyApplication.onCreate"
1052 );
1053
1054 span_description_test!(
1055 activity_load,
1056 "io.sentry.samples.android.MainActivity.onCreate",
1057 "activity.load",
1058 "io.sentry.samples.android.MainActivity.onCreate"
1059 );
1060
1061 span_description_test!(
1062 span_description_file_write_keep_extension_only,
1063 "data.data (42 KB)",
1064 "file.write",
1065 "*.data"
1066 );
1067
1068 span_description_test!(
1069 span_description_file_read_keep_extension_only,
1070 "Info.plist",
1071 "file.read",
1072 "*.plist"
1073 );
1074
1075 span_description_test!(
1076 span_description_file_with_no_extension,
1077 "somefilenamewithnoextension",
1078 "file.read",
1079 "*"
1080 );
1081
1082 span_description_test!(
1083 span_description_file_extension_with_numbers_only,
1084 "backup.2024041101",
1085 "file.read",
1086 "*"
1087 );
1088
1089 span_description_test!(
1090 resource_url_with_fragment,
1091 "https://data.domain.com/data/guide123.gif#url=someotherurl",
1092 "resource.img",
1093 "https://*.domain.com/data/*.gif"
1094 );
1095
1096 span_description_test!(
1097 resource_script_with_no_extension,
1098 "https://www.domain.com/page?id=1234567890",
1099 "resource.script",
1100 "https://*.domain.com/page"
1101 );
1102
1103 span_description_test!(
1104 resource_script_with_no_domain,
1105 "/page.js?action=name",
1106 "resource.script",
1107 "/page.js"
1108 );
1109
1110 span_description_test!(
1111 resource_script_with_no_domain_no_extension,
1112 "/page?action=name",
1113 "resource.script",
1114 "/page"
1115 );
1116
1117 span_description_test!(
1118 resource_script_with_long_extension,
1119 "/path/to/file.thisismycustomfileextension2000",
1120 "resource.script",
1121 "/*/file.*"
1122 );
1123
1124 span_description_test!(
1125 resource_script_with_long_suffix,
1126 "/path/to/file.js~ri~some-_-1,,thing-_-words%2Fhere~ri~",
1127 "resource.script",
1128 "/*/file.js"
1129 );
1130
1131 span_description_test!(
1132 resource_script_with_tilde_extension,
1133 "/path/to/file.~~",
1134 "resource.script",
1135 "/*/file"
1136 );
1137
1138 span_description_test!(
1139 resource_img_extension,
1140 "http://domain.com/something.123",
1141 "resource.img",
1142 "http://domain.com/*.*"
1143 );
1144
1145 span_description_test!(
1146 resource_img_embedded,
1147 "data:image/svg+xml;base64,PHN2ZyB4bW",
1148 "resource.img",
1149 "data:image/svg+xml"
1150 );
1151
1152 span_description_test!(
1153 db_category_with_mongodb_query,
1154 "find({some_id:1234567890},{limit:100})",
1155 "db",
1156 ""
1157 );
1158
1159 span_description_test!(db_category_with_not_sql, "{someField:someValue}", "db", "");
1160
1161 span_description_test!(
1162 resource_img_semi_colon,
1163 "http://www.foo.com/path/to/resource;param1=test;param2=ing",
1164 "resource.img",
1165 "http://*.foo.com/*/*"
1166 );
1167
1168 span_description_test!(
1169 resource_img_comma_with_extension,
1170 "https://example.org/p/fit=cover,width=150,height=150,format=auto,quality=90/media/photosV2/weird-stuff-123-234-456.jpg",
1171 "resource.img",
1172 "https://example.org/*/media/*/*.jpg"
1173 );
1174
1175 span_description_test!(
1176 resource_script_comma_with_extension,
1177 "https://example.org/p/fit=cover,width=150,height=150,format=auto,quality=90/media/photosV2/weird-stuff-123-234-456.js",
1178 "resource.script",
1179 "https://example.org/*/media/*/weird-stuff-*-*-*.js"
1180 );
1181
1182 span_description_test!(
1183 resource_img_path_with_comma,
1184 "/help/purchase-details/1,*,0&fmt=webp&qlt=*,1&fit=constrain,0&op_sharpen=0&resMode=sharp2&iccEmbed=0&printRes=*",
1185 "resource.img",
1186 "/*/*"
1187 );
1188
1189 span_description_test!(
1190 resource_script_path_with_comma,
1191 "/help/purchase-details/1,*,0&fmt=webp&qlt=*,1&fit=constrain,0&op_sharpen=0&resMode=sharp2&iccEmbed=0&printRes=*",
1192 "resource.script",
1193 "/*/*"
1194 );
1195
1196 span_description_test!(
1197 resource_script_random_path_only,
1198 "/ERs-sUsu3/wd4/LyMTWg/Ot1Om4m8cu3p7a/QkJWAQ/FSYL/GBlxb3kB",
1199 "resource.script",
1200 "/*/*"
1201 );
1202
1203 span_description_test!(
1204 resource_script_normalize_domain,
1205 "https://sub.sub.sub.domain.com/resource.js",
1206 "resource.script",
1207 "https://*.domain.com/resource.js"
1208 );
1209
1210 span_description_test!(
1211 resource_script_extension_in_segment,
1212 "https://domain.com/foo.bar/resource.js",
1213 "resource.script",
1214 "https://domain.com/*/resource.js"
1215 );
1216
1217 span_description_test!(
1218 resource_script_missing_scheme,
1219 "domain.com/foo.bar/resource.js",
1220 "resource.script",
1221 "*/resource.js"
1222 );
1223
1224 span_description_test!(
1225 resource_script_missing_scheme_integer_id,
1226 "domain.com/zero-length-00",
1227 "resource.script",
1228 "*/zero-length-*"
1229 );
1230
1231 span_description_test!(db_prisma, "User find", "db.sql.prisma", "User find");
1232
1233 span_description_test!(
1234 function_python,
1235 "sentry.event_manager.assign_event_to_group",
1236 "function",
1237 "sentry.event_manager.assign_event_to_group"
1238 );
1239
1240 span_description_test!(
1241 function_rust,
1242 "symbolicator_native::symbolication::symbolicate::symbolicate",
1243 "function",
1244 "symbolicator_native::symbolication::symbolicate::symbolicate"
1245 );
1246
1247 span_description_test!(
1248 function_with_hex,
1249 "symbolicator_native::symbolication::symbolicate::deadbeef",
1250 "function",
1251 "symbolicator_native::symbolication::symbolicate::*"
1252 );
1253
1254 span_description_test!(
1255 function_with_uuid,
1256 "symbolicator_native::symbolication::fb37f08422034ee985e9fc553ef27e6e::symbolicate",
1257 "function",
1258 "symbolicator_native::symbolication::*::symbolicate"
1259 );
1260
1261 #[test]
1262 fn informed_sql_parser() {
1263 let json = r#"
1264 {
1265 "description": "SELECT \"not an identifier\"",
1266 "span_id": "bd2eb23da2beb459",
1267 "start_timestamp": 1597976393.4619668,
1268 "timestamp": 1597976393.4718769,
1269 "trace_id": "ff62a8b040f340bda5d830223def1d81",
1270 "op": "db",
1271 "data": {"db.system": "mysql"}
1272 }
1273 "#;
1274
1275 let mut span = Annotated::<Span>::from_json(json).unwrap();
1276 let span = span.value_mut().as_mut().unwrap();
1277 let scrubbed = scrub_span_description(span, &[]);
1278 assert_eq!(scrubbed.0.as_deref(), Some("SELECT %s"));
1279 }
1280
1281 #[test]
1282 fn active_record() {
1283 let json = r#"{
1284 "description": "/*some comment `my_function'*/ SELECT `a` FROM `b`",
1285 "op": "db.sql.activerecord"
1286 }"#;
1287
1288 let mut span = Annotated::<Span>::from_json(json).unwrap();
1289
1290 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1291
1292 assert!(scrubbed.0.is_none());
1294 }
1295
1296 #[test]
1297 fn active_record_with_db_system() {
1298 let json = r#"{
1299 "description": "/*some comment `my_function'*/ SELECT `a` FROM `b`",
1300 "op": "db.sql.activerecord",
1301 "data": {
1302 "db.system": "mysql"
1303 }
1304 }"#;
1305
1306 let mut span = Annotated::<Span>::from_json(json).unwrap();
1307
1308 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1309
1310 assert_eq!(scrubbed.0.as_deref(), Some("SELECT a FROM b"));
1312 }
1313
1314 #[test]
1315 fn redis_with_db_system() {
1316 let json = r#"{
1317 "description": "del myveryrandomkey:123Xalsdkxfhn",
1318 "op": "db",
1319 "data": {
1320 "db.system": "redis"
1321 }
1322 }"#;
1323
1324 let mut span = Annotated::<Span>::from_json(json).unwrap();
1325
1326 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1327
1328 assert_eq!(scrubbed.0.as_deref(), Some("DEL *"));
1329 }
1330
1331 #[test]
1332 fn core_data() {
1333 let json = r#"{
1334 "description": "INSERTED 1 'UAEventData'",
1335 "op": "db.sql.transaction",
1336 "origin": "auto.db.core_data"
1337 }"#;
1338
1339 let mut span = Annotated::<Span>::from_json(json).unwrap();
1340
1341 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1342
1343 assert_eq!(scrubbed.0.as_deref(), Some("INSERTED * 'UAEventData'"));
1344 }
1345
1346 #[test]
1347 fn multiple_core_data() {
1348 let json = r#"{
1349 "description": "UPDATED 1 'QueuedRequest', DELETED 1 'QueuedRequest'",
1350 "op": "db.sql.transaction",
1351 "origin": "auto.db.core_data"
1352 }"#;
1353
1354 let mut span = Annotated::<Span>::from_json(json).unwrap();
1355
1356 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1357
1358 assert_eq!(
1359 scrubbed.0.as_deref(),
1360 Some("UPDATED * 'QueuedRequest', DELETED * 'QueuedRequest'")
1361 );
1362 }
1363
1364 #[test]
1365 fn mongodb_scrubbing() {
1366 let json = r#"{
1367 "description": "{\"find\": \"documents\", \"foo\": \"bar\"}",
1368 "op": "db",
1369 "data": {
1370 "db.system": "mongodb",
1371 "db.operation": "find",
1372 "db.collection.name": "documents"
1373 }
1374 }"#;
1375
1376 let mut span = Annotated::<Span>::from_json(json).unwrap();
1377
1378 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1379
1380 assert_eq!(
1381 scrubbed.0.as_deref(),
1382 Some(r#"{"find":"documents","foo":"?"}"#)
1383 )
1384 }
1385
1386 #[test]
1387 fn mongodb_with_legacy_collection_property() {
1388 let json = r#"{
1389 "description": "{\"find\": \"documents\", \"foo\": \"bar\"}",
1390 "op": "db",
1391 "data": {
1392 "db.system": "mongodb",
1393 "db.operation": "find",
1394 "db.mongodb.collection": "documents"
1395 }
1396 }"#;
1397
1398 let mut span = Annotated::<Span>::from_json(json).unwrap();
1399
1400 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1401
1402 assert_eq!(
1403 scrubbed.0.as_deref(),
1404 Some(r#"{"find":"documents","foo":"?"}"#)
1405 )
1406 }
1407
1408 #[test]
1409 fn ui_interaction_with_component_name() {
1410 let json = r#"{
1411 "description": "input.app-asdfasfg.asdfasdf[type=\"range\"][name=\"replay-timeline\"]",
1412 "op": "ui.interaction.click",
1413 "data": {
1414 "ui.component_name": "my-component-name"
1415 }
1416 }"#;
1417
1418 let mut span = Annotated::<Span>::from_json(json).unwrap();
1419
1420 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1421
1422 assert_eq!(scrubbed.0.as_deref(), Some("my-component-name"));
1424 }
1425
1426 #[test]
1427 fn scrub_allowed_host() {
1428 let examples = [
1429 (
1430 "https://foo.bar.internal/api/v1/submit",
1431 ["foo.bar.internal".to_owned()],
1432 "https://foo.bar.internal",
1433 ),
1434 (
1435 "http://192.168.1.1:3000",
1436 ["192.168.1.1".to_owned()],
1437 "http://192.168.1.1:3000",
1438 ),
1439 (
1440 "http://[1fff:0:a88:85a3::ac1f]:8001/foo",
1441 ["[1fff:0:a88:85a3::ac1f]".to_owned()],
1442 "http://[1fff:0:a88:85a3::ac1f]:8001",
1443 ),
1444 ];
1445
1446 for (url, allowed_hosts, expected) in examples {
1447 let json = format!(
1448 r#"{{
1449 "description": "POST {url}",
1450 "span_id": "bd2eb23da2beb459",
1451 "start_timestamp": 1597976393.4619668,
1452 "timestamp": 1597976393.4718769,
1453 "trace_id": "ff62a8b040f340bda5d830223def1d81",
1454 "op": "http.client"
1455 }}
1456 "#,
1457 );
1458
1459 let mut span = Annotated::<Span>::from_json(&json).unwrap();
1460
1461 let scrubbed =
1462 scrub_span_description(span.value_mut().as_mut().unwrap(), &allowed_hosts);
1463
1464 assert_eq!(
1465 scrubbed.0.as_deref(),
1466 Some(format!("POST {expected}").as_str()),
1467 "Could not match {url}"
1468 );
1469 }
1470 }
1471
1472 macro_rules! mongodb_scrubbing_test {
1473 ($name:ident, $description_in:expr, $operation_in:literal, $collection_in:literal, $expected:literal) => {
1478 #[test]
1479 fn $name() {
1480 let json = format!(
1481 r#"
1482 {{
1483 "description": "",
1484 "span_id": "bd2eb23da2beb459",
1485 "start_timestamp": 1597976393.4619668,
1486 "timestamp": 1597976393.4718769,
1487 "trace_id": "ff62a8b040f340bda5d830223def1d81",
1488 "op": "db",
1489 "data": {{
1490 "db.system": "mongodb",
1491 "db.operation": {},
1492 "db.collection.name": {}
1493 }}
1494 }}
1495 "#,
1496 if $operation_in == "" {
1497 "null".to_owned()
1498 } else {
1499 format!("\"{}\"", $operation_in)
1500 },
1501 if $collection_in == "" {
1502 "null".to_owned()
1503 } else {
1504 format!("\"{}\"", $collection_in)
1505 }
1506 );
1507
1508 let mut span = Annotated::<Span>::from_json(&json).unwrap();
1509 span.value_mut()
1510 .as_mut()
1511 .unwrap()
1512 .description
1513 .set_value(Some($description_in.into()));
1514
1515 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1516
1517 if $expected == "" {
1518 assert!(scrubbed.0.is_none());
1519 } else {
1520 assert_eq!($expected, scrubbed.0.unwrap());
1521 }
1522 }
1523 };
1524 }
1525
1526 mongodb_scrubbing_test!(
1527 mongodb_basic_query,
1528 r#"{"find": "documents", "showRecordId": true}"#,
1529 "find",
1530 "documents",
1531 r#"{"find":"documents","showRecordId":"?"}"#
1532 );
1533
1534 mongodb_scrubbing_test!(
1535 mongodb_query_with_document_param,
1536 r#"{"find": "documents", "filter": {"foo": "bar"}}"#,
1537 "find",
1538 "documents",
1539 r#"{"filter":{"foo":"?"},"find":"documents"}"#
1540 );
1541
1542 mongodb_scrubbing_test!(
1543 mongodb_query_without_operation,
1544 r#"{"filter": {"foo": "bar"}}"#,
1545 "find",
1546 "documents",
1547 r#"{"filter":{"foo":"?"},"find":"documents"}"#
1548 );
1549
1550 mongodb_scrubbing_test!(
1551 mongodb_without_collection_in_data,
1552 r#"{"find": "documents", "showRecordId": true}"#,
1553 "find",
1554 "",
1555 ""
1556 );
1557
1558 mongodb_scrubbing_test!(
1559 mongodb_without_operation_in_data,
1560 r#"{"find": "documents", "showRecordId": true}"#,
1561 "",
1562 "documents",
1563 ""
1564 );
1565
1566 mongodb_scrubbing_test!(
1567 mongodb_max_depth,
1568 r#"{"update": "coll", "updates": {"q": {"_id": "1"}, "u": {"$set": {"foo": {"bar": {"baz": "quux"}}}}}}"#,
1569 "update",
1570 "coll",
1571 r#"{"update":"coll","updates":{"q":{"_id":"?"},"u":{"$set":{"foo":"?"}}}}"#
1572 );
1573
1574 mongodb_scrubbing_test!(
1575 mongodb_identifier_in_collection,
1576 r#"{"find": "documents001", "showRecordId": true}"#,
1577 "find",
1578 "documents001",
1579 r#"{"find":"documents{%s}","showRecordId":"?"}"#
1580 );
1581
1582 mongodb_scrubbing_test!(
1583 mongodb_query_with_array,
1584 r#"{"insert": "documents", "documents": [{"foo": "bar"}, {"baz": "quux"}, {"qux": "quuz"}]}"#,
1585 "insert",
1586 "documents",
1587 r#"{"documents":["..."],"insert":"documents"}"#
1588 );
1589
1590 mongodb_scrubbing_test!(
1591 mongodb_query_with_buffer,
1592 r#"{"insert": "documents", "buffer": {"0": "a", "1": "b", "2": "c"}, "documents": [{"foo": "bar"}]}"#,
1593 "insert",
1594 "documents",
1595 r#"{"documents":["..."],"insert":"documents"}"#
1596 );
1597}