1mod redis;
3mod resource;
4mod sql;
5use psl;
6use relay_filter::matches_any_origin;
7use serde_json::Value;
8#[cfg(test)]
9pub use sql::{Mode, scrub_queries};
10use std::sync::LazyLock;
11
12use relay_event_schema::protocol::Span;
13use std::borrow::Cow;
14use std::net::{Ipv4Addr, Ipv6Addr};
15use std::path::Path;
16use url::{Host, Url};
17
18use crate::regexes::{
19 DB_SQL_TRANSACTION_CORE_DATA_REGEX, DB_SUPABASE_REGEX, FUNCTION_NORMALIZER_REGEX,
20 RESOURCE_NORMALIZER_REGEX,
21};
22use crate::span::TABLE_NAME_REGEX;
23use crate::span::description::redis::matching_redis_command;
24use crate::span::description::resource::COMMON_PATH_SEGMENTS;
25use crate::span::tag_extraction::HTTP_METHOD_EXTRACTOR_REGEX;
26
27static DUMMY_BASE_URL: LazyLock<Url> = LazyLock::new(|| "http://replace_me".parse().unwrap());
29
30const MAX_SEGMENT_LENGTH: usize = 25;
34
35const MAX_EXTENSION_LENGTH: usize = 10;
37
38const DOMAIN_ALLOW_LIST: &[&str] = &["localhost"];
40
41pub(crate) fn scrub_span_description(
45 span: &Span,
46 span_allowed_hosts: &[String],
47) -> (Option<String>, Option<Vec<sqlparser::ast::Statement>>) {
48 let Some(description) = span.description.as_str() else {
49 return (None, None);
50 };
51
52 let data = span.data.value();
53
54 let db_system = data
55 .and_then(|data| data.db_system.value())
56 .and_then(|system| system.as_str());
57 let span_origin = span.origin.as_str();
58
59 let mut parsed_sql = None;
60 let scrubbed_description = span
61 .op
62 .as_str()
63 .map(|op| op.split_once('.').unwrap_or((op, "")))
64 .and_then(|(op, sub)| match (op, sub) {
65 ("http", _) => scrub_http(description, span_allowed_hosts),
66 ("cache", _) => scrub_redis_keys(description),
67 ("db", sub) => {
68 let db_operation = data
69 .and_then(|data| data.db_operation.value())
70 .and_then(|op| op.as_str());
71
72 let collection_name = data
73 .and_then(|data| data.db_collection_name.value())
74 .and_then(|collection| collection.as_str());
75
76 let (scrubbed, parsed_sql_statement) = scrub_db_query(
77 description,
78 sub,
79 db_system,
80 db_operation,
81 collection_name,
82 span_origin,
83 );
84
85 parsed_sql = parsed_sql_statement;
86
87 scrubbed
88 }
89 ("resource", ty) => scrub_resource(ty, description),
90 ("ai", sub) => match sub.split_once('.').unwrap_or((sub, "")) {
91 ("run" | "pipeline", _) => {
92 Some(description.to_owned())
95 }
96 _ => None,
97 },
98 ("ui", "load") => {
99 Some(description.to_owned())
102 }
103 ("ui", sub) if sub.starts_with("interaction.") || sub.starts_with("react.") => data
104 .and_then(|data| data.ui_component_name.value())
105 .and_then(|value| value.as_str())
106 .map(String::from),
107 ("app", _) => {
108 Some(description.to_owned())
112 }
113 ("contentprovider", "load") => {
114 Some(description.to_owned())
119 }
120 ("application", "load") => {
121 Some(description.to_owned())
126 }
127 ("activity", "load") => {
128 Some(description.to_owned())
132 }
133 ("file", _) => scrub_file(description),
134 ("function", _) => scrub_function(description),
135 _ => None,
136 });
137 (scrubbed_description, parsed_sql)
138}
139
140pub fn scrub_db_query(
144 raw_query: &str,
145 sub_op: &str,
146 db_system: Option<&str>,
147 db_operation: Option<&str>,
148 collection_name: Option<&str>,
149 span_origin: Option<&str>,
150) -> (Option<String>, Option<Vec<sqlparser::ast::Statement>>) {
151 let mut parsed_sql = None;
152
153 let scrubbed = if db_system == Some("redis") || sub_op == "redis" {
154 scrub_redis_keys(raw_query)
155 } else if db_system == Some("mongodb") {
156 if let (Some(command), Some(collection)) = (db_operation, collection_name) {
157 scrub_mongodb_query(raw_query, command, collection)
158 } else {
159 None
160 }
161 } else if sub_op.contains("clickhouse")
162 || sub_op.contains("mongodb")
163 || sub_op.contains("redis")
164 || is_legacy_activerecord(sub_op, db_system)
165 || is_sql_mongodb(raw_query, db_system)
166 {
167 None
168 } else if span_origin == Some("auto.db.core_data") {
169 scrub_core_data(raw_query)
171 } else if sub_op.contains("prisma") {
172 Some(raw_query.to_owned())
176 } else if span_origin == Some("auto.db.supabase") && raw_query.starts_with("from(") {
177 scrub_supabase(raw_query)
181 } else {
182 let (scrubbed, mode) = sql::scrub_queries(db_system, raw_query);
183 if let sql::Mode::Parsed(ast) = mode {
184 parsed_sql = Some(ast);
185 }
186 scrubbed
187 };
188
189 (scrubbed, parsed_sql)
190}
191
192fn is_sql_mongodb(description: &str, db_system: Option<&str>) -> bool {
194 description.contains("\"$")
195 || description.contains("({")
196 || description.contains("[{")
197 || description.starts_with('{')
198 || db_system == Some("mongodb")
199}
200
201fn is_legacy_activerecord(sub_op: &str, db_system: Option<&str>) -> bool {
203 db_system.is_none() && (sub_op.contains("active_record") || sub_op.contains("activerecord"))
204}
205
206fn scrub_core_data(string: &str) -> Option<String> {
207 match DB_SQL_TRANSACTION_CORE_DATA_REGEX.replace_all(string, "*") {
208 Cow::Owned(scrubbed) => Some(scrubbed),
209 Cow::Borrowed(_) => None,
210 }
211}
212
213fn scrub_supabase(string: &str) -> Option<String> {
214 Some(DB_SUPABASE_REGEX.replace_all(string, "{%s}").into())
215}
216
217fn scrub_http(string: &str, allow_list: &[String]) -> Option<String> {
218 let (method, url) = string.split_once(' ')?;
219 if !HTTP_METHOD_EXTRACTOR_REGEX.is_match(method) {
220 return None;
221 };
222
223 if url.starts_with("data:image/") {
224 return Some(format!("{method} data:image/*"));
225 }
226
227 let scrubbed = match Url::parse(url) {
228 Ok(url) => {
229 let scheme = url.scheme();
230 let scrubbed_host = url.host().map(|host| scrub_host(host, allow_list));
231 let domain = concatenate_host_and_port(scrubbed_host.as_deref(), url.port());
232
233 format!("{method} {scheme}://{domain}")
234 }
235 Err(_) => {
236 format!("{method} *")
237 }
238 };
239
240 Some(scrubbed)
241}
242
243fn scrub_file(description: &str) -> Option<String> {
244 let filename = match description.split_once(' ') {
245 Some((filename, _)) => filename,
246 _ => description,
247 };
248 match Path::new(filename).extension() {
249 Some(extension) => {
250 let ext = scrub_resource_file_extension(extension.to_str()?);
251 if ext != "*" {
252 Some(format!("*.{ext}"))
253 } else {
254 Some("*".to_owned())
255 }
256 }
257 _ => Some("*".to_owned()),
258 }
259}
260
261pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [String]) -> Cow<'a, str> {
278 let allow_list: Vec<_> = allow_list
279 .iter()
280 .map(|origin| origin.as_str().into())
281 .collect();
282
283 if matches_any_origin(Some(host.to_string().as_str()), &allow_list) {
284 return host.to_string().into();
285 }
286
287 match host {
288 Host::Ipv4(ip) => Cow::Borrowed(scrub_ipv4(ip)),
289 Host::Ipv6(ip) => Cow::Borrowed(scrub_ipv6(ip)),
290 Host::Domain(domain) => scrub_domain_name(domain),
291 }
292}
293
294pub fn scrub_ipv4(ip: Ipv4Addr) -> &'static str {
309 match ip {
310 Ipv4Addr::LOCALHOST => "127.0.0.1",
311 _ => "*.*.*.*",
312 }
313}
314
315pub fn scrub_ipv6(ip: Ipv6Addr) -> &'static str {
327 match ip {
328 Ipv6Addr::LOCALHOST => "::1",
329 _ => "*:*:*:*:*:*:*:*",
330 }
331}
332
333pub fn scrub_domain_name(domain: &str) -> Cow<'_, str> {
348 if DOMAIN_ALLOW_LIST.contains(&domain) {
349 return Cow::Borrowed(domain);
350 }
351
352 let parsed_domain = psl::domain(domain.as_bytes());
353
354 let Some(parsed_domain) = parsed_domain else {
355 return Cow::Borrowed(domain);
357 };
358
359 let suffix = parsed_domain.suffix().as_bytes();
360 let Some(second_level_domain) = parsed_domain.as_bytes().strip_suffix(suffix) else {
361 return Cow::Borrowed(domain);
362 };
363
364 let subdomain = domain
365 .as_bytes()
366 .strip_suffix(suffix)
367 .and_then(|s| s.strip_suffix(second_level_domain));
368
369 match subdomain {
370 None | Some(b"") => Cow::Borrowed(domain),
371 Some(_subdomain) => {
372 let scrubbed = [b"*.", second_level_domain, suffix].concat();
373 match String::from_utf8(scrubbed) {
374 Ok(s) => Cow::Owned(s),
375 Err(_) => Cow::Borrowed(domain),
376 }
377 }
378 }
379}
380
381pub fn concatenate_host_and_port(host: Option<&str>, port: Option<u16>) -> Cow<'_, str> {
395 match (host, port) {
396 (None, _) => Cow::Borrowed(""),
397 (Some(host), None) => Cow::Borrowed(host),
398 (Some(host), Some(port)) => Cow::Owned(format!("{host}:{port}")),
399 }
400}
401
402fn scrub_redis_keys(string: &str) -> Option<String> {
403 let string = string.trim();
404 Some(match matching_redis_command(string) {
405 Some(command) => {
406 let mut command = command.to_uppercase();
407 match string.get(command.len()..) {
408 None | Some("") => command,
409 Some(_other) => {
410 command.push_str(" *");
411 command
412 }
413 }
414 }
415 None => "*".to_owned(),
416 })
417}
418
419enum UrlType {
420 Full,
422 Absolute,
424 Relative,
426}
427
428fn scrub_resource(resource_type: &str, string: &str) -> Option<String> {
430 let (url, ty) = match Url::parse(string) {
431 Ok(url) => (url, UrlType::Full),
432 Err(url::ParseError::RelativeUrlWithoutBase) => {
433 match Url::options().base_url(Some(&DUMMY_BASE_URL)).parse(string) {
435 Ok(url) => (
436 url,
437 if string.starts_with('/') {
438 UrlType::Absolute
439 } else {
440 UrlType::Relative
441 },
442 ),
443 Err(_) => return None,
444 }
445 }
446 Err(_) => {
447 return None;
448 }
449 };
450
451 let formatted = match url.scheme() {
452 "data" => match url.path().split_once(';') {
453 Some((ty, _data)) => format!("data:{ty}"),
454 None => "data:*/*".to_owned(),
455 },
456 "chrome-extension" | "moz-extension" | "ms-browser-extension" => {
457 return Some("browser-extension://*".to_owned());
458 }
459 scheme => {
460 let scrubbed_host = url.host().map(|host| scrub_host(host, &[]));
461 let domain = concatenate_host_and_port(scrubbed_host.as_deref(), url.port());
462
463 let segment_count = url.path_segments().map(|s| s.count()).unwrap_or_default();
464 let mut output_segments = vec![];
465 for (i, segment) in url.path_segments().into_iter().flatten().enumerate() {
466 if i + 1 == segment_count {
467 break;
468 }
469 if COMMON_PATH_SEGMENTS.contains(segment) {
470 output_segments.push(segment);
471 } else if output_segments.last().is_none_or(|s| *s != "*") {
472 output_segments.push("*");
474 }
475 }
476
477 let segments = output_segments.join("/");
478
479 let last_segment = url
480 .path_segments()
481 .and_then(|mut s| s.next_back())
482 .unwrap_or_default();
483 let last_segment = scrub_resource_filename(resource_type, last_segment);
484
485 if segments.is_empty() {
486 format!("{scheme}://{domain}/{last_segment}")
487 } else {
488 format!("{scheme}://{domain}/{segments}/{last_segment}")
489 }
490 }
491 };
492
493 let formatted = match ty {
495 UrlType::Full => formatted,
496 UrlType::Absolute => formatted.replace("http://replace_me", ""),
497 UrlType::Relative => formatted.replace("http://replace_me/", ""),
498 };
499
500 Some(formatted)
501}
502
503fn scrub_resource_filename<'a>(ty: &str, path: &'a str) -> Cow<'a, str> {
504 if path.is_empty() {
505 return Cow::Borrowed("");
506 }
507 let (mut basename, mut extension) = path.rsplit_once('.').unwrap_or((path, ""));
508 if extension.contains('/') {
509 basename = path;
511 extension = "";
512 }
513
514 let extension = scrub_resource_file_extension(extension);
515
516 let basename = if ty == "img" {
517 Cow::Borrowed("*")
518 } else {
519 scrub_resource_segment(basename)
520 };
521
522 if extension.is_empty() {
523 basename
524 } else {
525 let mut filename = basename.to_string();
526 filename.push('.');
527 filename.push_str(extension);
528 Cow::Owned(filename)
529 }
530}
531
532fn scrub_resource_segment(segment: &str) -> Cow<'_, str> {
533 let segment = RESOURCE_NORMALIZER_REGEX.replace_all(segment, "$pre*$post");
534
535 if segment.len() > MAX_SEGMENT_LENGTH {
537 return Cow::Borrowed("*");
538 }
539
540 let mut all_alphabetic = true;
541 let mut found_uppercase = false;
542
543 for char in segment.chars() {
545 if !char.is_ascii_alphabetic() {
546 all_alphabetic = false;
547 }
548 if char.is_ascii_uppercase() {
549 found_uppercase = true;
550 }
551 if char.is_numeric() || "&%#=+@".contains(char) {
552 return Cow::Borrowed("*");
553 };
554 }
555
556 if all_alphabetic && found_uppercase {
557 return Cow::Borrowed("*");
559 }
560
561 segment
562}
563
564fn scrub_resource_file_extension(mut extension: &str) -> &str {
565 let mut digits = 0;
567 for (i, byte) in extension.bytes().enumerate() {
568 if byte.is_ascii_digit() {
569 digits += 1;
570 }
571 if digits > 1 {
572 return "*";
574 }
575 if !byte.is_ascii_alphanumeric() {
576 extension = &extension[..i];
577 break;
578 }
579 }
580
581 if extension.len() > MAX_EXTENSION_LENGTH {
582 extension = "*";
583 }
584
585 extension
586}
587
588fn scrub_function(string: &str) -> Option<String> {
589 Some(FUNCTION_NORMALIZER_REGEX.replace_all(string, "*").into())
590}
591
592fn scrub_mongodb_query(query: &str, command: &str, collection: &str) -> Option<String> {
593 let mut query: Value = serde_json::from_str(query).ok()?;
594
595 let root = query.as_object_mut()?;
596
597 root.remove("buffer");
599
600 for value in root.values_mut() {
601 scrub_mongodb_visit_node(value, 3);
602 }
603
604 let scrubbed_collection_name =
605 if let Cow::Owned(s) = TABLE_NAME_REGEX.replace_all(collection, "{%s}") {
606 s
607 } else {
608 collection.to_owned()
609 };
610 root.insert(command.to_owned(), Value::String(scrubbed_collection_name));
611
612 Some(query.to_string())
613}
614
615fn scrub_mongodb_visit_node(value: &mut Value, recursion_limit: usize) {
616 if recursion_limit == 0 {
617 match value {
618 Value::String(str) => {
619 str.clear();
620 str.push('?');
621 }
622 value => *value = Value::String("?".to_owned()),
623 }
624 return;
625 }
626
627 match value {
628 Value::Object(map) => {
629 for value in map.values_mut() {
630 scrub_mongodb_visit_node(value, recursion_limit - 1);
631 }
632 }
633 Value::Array(arr) => {
634 arr.clear();
635 arr.push(Value::String("...".to_owned()));
636 }
637 Value::String(str) => {
638 str.clear();
639 str.push('?');
640 }
641 value => *value = Value::String("?".to_owned()),
642 }
643}
644
645#[cfg(test)]
646mod tests {
647 use super::*;
648 use relay_protocol::Annotated;
649 use similar_asserts::assert_eq;
650
651 macro_rules! span_description_test {
652 ($name:ident, $description_in:expr, $op_in:literal, $expected:literal) => {
657 #[test]
658 fn $name() {
659 let json = format!(
660 r#"
661 {{
662 "description": "",
663 "span_id": "bd2eb23da2beb459",
664 "start_timestamp": 1597976393.4619668,
665 "timestamp": 1597976393.4718769,
666 "trace_id": "ff62a8b040f340bda5d830223def1d81",
667 "op": "{}"
668 }}
669 "#,
670 $op_in
671 );
672
673 let mut span = Annotated::<Span>::from_json(&json).unwrap();
674 span.value_mut()
675 .as_mut()
676 .unwrap()
677 .description
678 .set_value(Some($description_in.into()));
679
680 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
681
682 if $expected == "" {
683 assert!(scrubbed.0.is_none());
684 } else {
685 assert_eq!($expected, scrubbed.0.unwrap());
686 }
687 }
688 };
689 }
690
691 macro_rules! span_description_test_with_lowercase {
692 ($name:ident, $name2:ident, $description_in:expr, $op_in:literal, $expected:literal) => {
693 span_description_test!($name, $description_in, $op_in, $expected);
694 span_description_test!($name2, ($description_in).to_lowercase(), $op_in, $expected);
695 };
696 }
697
698 span_description_test!(empty, "", "http.client", "");
699
700 span_description_test!(
701 only_domain,
702 "GET http://service.io",
703 "http.client",
704 "GET http://service.io"
705 );
706
707 span_description_test!(
708 only_urllike_on_http_ops,
709 "GET https://www.service.io/resources/01234",
710 "http.client",
711 "GET https://*.service.io"
712 );
713
714 span_description_test!(
715 path_ids_end,
716 "GET https://www.service.io/resources/01234",
717 "http.client",
718 "GET https://*.service.io"
719 );
720
721 span_description_test!(
722 path_ids_middle,
723 "GET https://www.service.io/resources/01234/details",
724 "http.client",
725 "GET https://*.service.io"
726 );
727
728 span_description_test!(
729 path_multiple_ids,
730 "GET https://www.service.io/users/01234-qwerty/settings/98765-adfghj",
731 "http.client",
732 "GET https://*.service.io"
733 );
734
735 span_description_test!(
736 localhost,
737 "GET https://localhost/data",
738 "http.client",
739 "GET https://localhost"
740 );
741
742 span_description_test!(
743 loopback,
744 "GET https://127.0.0.1/data",
745 "http.client",
746 "GET https://127.0.0.1"
747 );
748
749 span_description_test!(
750 ip_address,
751 "GET https://8.8.8.8/data",
752 "http.client",
753 "GET https://*.*.*.*"
754 );
755
756 span_description_test!(
757 path_md5_hashes,
758 "GET /clients/563712f9722fb0996ac8f3905b40786f/project/01234",
759 "http.client",
760 "GET *"
761 );
762
763 span_description_test!(
764 path_sha_hashes,
765 "GET /clients/403926033d001b5279df37cbbe5287b7c7c267fa/project/01234",
766 "http.client",
767 "GET *"
768 );
769
770 span_description_test!(
771 hex,
772 "GET /shop/de/f43/beef/3D6/my-beef",
773 "http.client",
774 "GET *"
775 );
776
777 span_description_test!(
778 path_uuids,
779 "GET /clients/8ff81d74-606d-4c75-ac5e-cee65cbbc866/project/01234",
780 "http.client",
781 "GET *"
782 );
783
784 span_description_test!(
785 data_images,
786 "GET ",
787 "http.client",
788 "GET data:image/*"
789 );
790
791 span_description_test!(
792 simple_cctld,
793 "GET http://bbc.co.uk",
794 "http.client",
795 "GET http://bbc.co.uk"
796 );
797
798 span_description_test!(
799 longer_cctld,
800 "GET http://www.radio1.bbc.co.uk",
801 "http.client",
802 "GET http://*.bbc.co.uk"
803 );
804
805 span_description_test!(
806 complicated_tld,
807 "GET https://application.www.xn--85x722f.xn--55qx5d.cn",
808 "http.client",
809 "GET https://*.xn--85x722f.xn--55qx5d.cn"
810 );
811
812 span_description_test!(
813 only_dblike_on_db_ops,
814 "SELECT count() FROM table WHERE id IN (%s, %s)",
815 "http.client",
816 ""
817 );
818
819 span_description_test_with_lowercase!(
820 cache,
821 cache_lower,
822 "GET abc:12:{def}:{34}:{fg56}:EAB38:zookeeper",
823 "cache.get_item",
824 "GET *"
825 );
826
827 span_description_test_with_lowercase!(
828 redis_set,
829 redis_set_lower,
830 "SET mykey myvalue",
831 "db.redis",
832 "SET *"
833 );
834
835 span_description_test_with_lowercase!(
836 redis_set_quoted,
837 redis_set_quoted_lower,
838 r#"SET mykey 'multi: part, value'"#,
839 "db.redis",
840 "SET *"
841 );
842
843 span_description_test_with_lowercase!(
844 redis_whitespace,
845 redis_whitespace_lower,
846 " GET asdf:123",
847 "db.redis",
848 "GET *"
849 );
850
851 span_description_test_with_lowercase!(
852 redis_no_args,
853 redis_no_args_lower,
854 "EXEC",
855 "db.redis",
856 "EXEC"
857 );
858
859 span_description_test_with_lowercase!(
860 redis_invalid,
861 redis_invalid_lower,
862 "What a beautiful day!",
863 "db.redis",
864 "*"
865 );
866
867 span_description_test_with_lowercase!(
868 redis_long_command,
869 redis_long_command_lower,
870 "ACL SETUSER jane",
871 "db.redis",
872 "ACL SETUSER *"
873 );
874
875 span_description_test!(
876 nothing_cache,
877 "abc-dontscrubme-meneither:stillno:ohplsstop",
878 "cache.get_item",
879 "*"
880 );
881
882 span_description_test!(
883 resource_script,
884 "https://example.com/static/chunks/vendors-node_modules_somemodule_v1.2.3_mini-dist_index_js-client_dist-6c733292-f3cd-11ed-a05b-0242ac120003-0dc369dcf3d311eda05b0242ac120003.[hash].abcd1234.chunk.js-0242ac120003.map",
885 "resource.script",
886 "https://example.com/static/chunks/*.map"
887 );
888
889 span_description_test!(
890 resource_script_numeric_filename,
891 "https://example.com/static/chunks/09876543211234567890",
892 "resource.script",
893 "https://example.com/static/chunks/*"
894 );
895
896 span_description_test!(
897 resource_next_chunks,
898 "/_next/static/chunks/12345-abcdef0123456789.js",
899 "resource.script",
900 "/_next/static/chunks/*-*.js"
901 );
902
903 span_description_test!(
904 resource_next_media,
905 "/_next/static/media/Some_Font-Bold.0123abcd.woff2",
906 "resource.css",
907 "/_next/static/media/Some_Font-Bold.*.woff2"
908 );
909
910 span_description_test!(
911 resource_css,
912 "https://example.com/assets/dark_high_contrast-764fa7c8-f3cd-11ed-a05b-0242ac120003.css",
913 "resource.css",
914 "https://example.com/assets/dark_high_contrast-*.css"
915 );
916
917 span_description_test!(
918 integer_in_resource,
919 "https://example.com/assets/this_is-a_good_resource-123-scrub_me.js",
920 "resource.css",
921 "https://example.com/assets/*.js"
922 );
923
924 span_description_test!(
925 resource_query_params,
926 "/organization-avatar/123/?s=120",
927 "resource.img",
928 "/*/"
929 );
930
931 span_description_test!(
932 resource_query_params2,
933 "https://data.domain.com/data/guide123.gif?jzb=3f535634H467g5-2f256f&ct=1234567890&v=1.203.0_prod",
934 "resource.img",
935 "https://*.domain.com/data/*.gif"
936 );
937
938 span_description_test!(
939 resource_query_params2_script,
940 "https://data.domain.com/data/guide123.js?jzb=3f535634H467g5-2f256f&ct=1234567890&v=1.203.0_prod",
941 "resource.script",
942 "https://*.domain.com/data/guide*.js"
943 );
944
945 span_description_test!(
946 resource_no_ids,
947 "https://data.domain.com/js/guide.js",
948 "resource.script",
949 "https://*.domain.com/js/guide.js"
950 );
951
952 span_description_test!(
953 resource_no_ids_img_known_segment,
954 "https://data.domain.com/data/guide.gif",
955 "resource.img",
956 "https://*.domain.com/data/*.gif"
957 );
958
959 span_description_test!(
960 resource_no_ids_img,
961 "https://data.domain.com/something/guide.gif",
962 "resource.img",
963 "https://*.domain.com/*/*.gif"
964 );
965
966 span_description_test!(
967 resource_webpack,
968 "https://domain.com/path/to/app-1f90d5.f012d11690e188c96fe6.js",
969 "resource.js",
970 "https://domain.com/*/app-*.*.js"
971 );
972
973 span_description_test!(
974 resource_vite,
975 "webroot/assets/Profile-73f6525d.js",
976 "resource.js",
977 "*/assets/Profile-*.js"
978 );
979
980 span_description_test!(
981 resource_vite_css,
982 "webroot/assets/Shop-1aff80f7.css",
983 "resource.css",
984 "*/assets/Shop-*.css"
985 );
986
987 span_description_test!(
988 chrome_extension,
989 "chrome-extension://begnopegbbhjeeiganiajffnalhlkkjb/img/assets/icon-10k.svg",
990 "resource.other",
991 "browser-extension://*"
992 );
993
994 span_description_test!(
995 urlencoded_path_segments,
996 "https://some.domain.com/embed/%2Fembed%2Fdashboards%2F20%3FSlug%3Dsomeone%*hide_title%3Dtrue",
997 "resource.iframe",
998 "https://*.domain.com/*/*"
999 );
1000
1001 span_description_test!(
1002 random_string1,
1003 "https://static.domain.com/6gezWf_qs4Wc12Nz9rpLOx2aw2k/foo-99",
1004 "resource.img",
1005 "https://*.domain.com/*/*"
1006 );
1007
1008 span_description_test!(
1009 random_string1_script,
1010 "https://static.domain.com/6gezWf_qs4Wc12Nz9rpLOx2aw2k/foo-99",
1011 "resource.script",
1012 "https://*.domain.com/*/foo-*"
1013 );
1014
1015 span_description_test!(
1016 random_string2,
1017 "http://domain.com/fy2XSqBMqkEm_qZZH3RrzvBTKg4/qltdXIJWTF_cuwt3uKmcwWBc1DM/z1a--BVsUI_oyUjJR12pDBcOIn5.dom.jsonp",
1018 "resource.script",
1019 "http://domain.com/*/*.jsonp"
1020 );
1021
1022 span_description_test!(
1023 random_string3,
1024 "jkhdkkncnoglghljlkmcimlnlhkeamab/123.css",
1025 "resource.link",
1026 "*/*.css"
1027 );
1028
1029 span_description_test!(
1030 ui_load,
1031 "ListAppViewController",
1032 "ui.load",
1033 "ListAppViewController"
1034 );
1035
1036 span_description_test!(
1037 contentprovider_load,
1038 "io.sentry.android.core.SentryPerformanceProvider.onCreate",
1039 "contentprovider.load",
1040 "io.sentry.android.core.SentryPerformanceProvider.onCreate"
1041 );
1042
1043 span_description_test!(
1044 application_load,
1045 "io.sentry.samples.android.MyApplication.onCreate",
1046 "application.load",
1047 "io.sentry.samples.android.MyApplication.onCreate"
1048 );
1049
1050 span_description_test!(
1051 activity_load,
1052 "io.sentry.samples.android.MainActivity.onCreate",
1053 "activity.load",
1054 "io.sentry.samples.android.MainActivity.onCreate"
1055 );
1056
1057 span_description_test!(
1058 span_description_file_write_keep_extension_only,
1059 "data.data (42 KB)",
1060 "file.write",
1061 "*.data"
1062 );
1063
1064 span_description_test!(
1065 span_description_file_read_keep_extension_only,
1066 "Info.plist",
1067 "file.read",
1068 "*.plist"
1069 );
1070
1071 span_description_test!(
1072 span_description_file_with_no_extension,
1073 "somefilenamewithnoextension",
1074 "file.read",
1075 "*"
1076 );
1077
1078 span_description_test!(
1079 span_description_file_extension_with_numbers_only,
1080 "backup.2024041101",
1081 "file.read",
1082 "*"
1083 );
1084
1085 span_description_test!(
1086 resource_url_with_fragment,
1087 "https://data.domain.com/data/guide123.gif#url=someotherurl",
1088 "resource.img",
1089 "https://*.domain.com/data/*.gif"
1090 );
1091
1092 span_description_test!(
1093 resource_script_with_no_extension,
1094 "https://www.domain.com/page?id=1234567890",
1095 "resource.script",
1096 "https://*.domain.com/page"
1097 );
1098
1099 span_description_test!(
1100 resource_script_with_no_domain,
1101 "/page.js?action=name",
1102 "resource.script",
1103 "/page.js"
1104 );
1105
1106 span_description_test!(
1107 resource_script_with_no_domain_no_extension,
1108 "/page?action=name",
1109 "resource.script",
1110 "/page"
1111 );
1112
1113 span_description_test!(
1114 resource_script_with_long_extension,
1115 "/path/to/file.thisismycustomfileextension2000",
1116 "resource.script",
1117 "/*/file.*"
1118 );
1119
1120 span_description_test!(
1121 resource_script_with_long_suffix,
1122 "/path/to/file.js~ri~some-_-1,,thing-_-words%2Fhere~ri~",
1123 "resource.script",
1124 "/*/file.js"
1125 );
1126
1127 span_description_test!(
1128 resource_script_with_tilde_extension,
1129 "/path/to/file.~~",
1130 "resource.script",
1131 "/*/file"
1132 );
1133
1134 span_description_test!(
1135 resource_img_extension,
1136 "http://domain.com/something.123",
1137 "resource.img",
1138 "http://domain.com/*.*"
1139 );
1140
1141 span_description_test!(
1142 resource_img_embedded,
1143 "",
1144 "resource.img",
1145 "data:image/svg+xml"
1146 );
1147
1148 span_description_test!(
1149 db_category_with_mongodb_query,
1150 "find({some_id:1234567890},{limit:100})",
1151 "db",
1152 ""
1153 );
1154
1155 span_description_test!(db_category_with_not_sql, "{someField:someValue}", "db", "");
1156
1157 span_description_test!(
1158 resource_img_semi_colon,
1159 "http://www.foo.com/path/to/resource;param1=test;param2=ing",
1160 "resource.img",
1161 "http://*.foo.com/*/*"
1162 );
1163
1164 span_description_test!(
1165 resource_img_comma_with_extension,
1166 "https://example.org/p/fit=cover,width=150,height=150,format=auto,quality=90/media/photosV2/weird-stuff-123-234-456.jpg",
1167 "resource.img",
1168 "https://example.org/*/media/*/*.jpg"
1169 );
1170
1171 span_description_test!(
1172 resource_script_comma_with_extension,
1173 "https://example.org/p/fit=cover,width=150,height=150,format=auto,quality=90/media/photosV2/weird-stuff-123-234-456.js",
1174 "resource.script",
1175 "https://example.org/*/media/*/weird-stuff-*-*-*.js"
1176 );
1177
1178 span_description_test!(
1179 resource_img_path_with_comma,
1180 "/help/purchase-details/1,*,0&fmt=webp&qlt=*,1&fit=constrain,0&op_sharpen=0&resMode=sharp2&iccEmbed=0&printRes=*",
1181 "resource.img",
1182 "/*/*"
1183 );
1184
1185 span_description_test!(
1186 resource_script_path_with_comma,
1187 "/help/purchase-details/1,*,0&fmt=webp&qlt=*,1&fit=constrain,0&op_sharpen=0&resMode=sharp2&iccEmbed=0&printRes=*",
1188 "resource.script",
1189 "/*/*"
1190 );
1191
1192 span_description_test!(
1193 resource_script_random_path_only,
1194 "/ERs-sUsu3/wd4/LyMTWg/Ot1Om4m8cu3p7a/QkJWAQ/FSYL/GBlxb3kB",
1195 "resource.script",
1196 "/*/*"
1197 );
1198
1199 span_description_test!(
1200 resource_script_normalize_domain,
1201 "https://sub.sub.sub.domain.com/resource.js",
1202 "resource.script",
1203 "https://*.domain.com/resource.js"
1204 );
1205
1206 span_description_test!(
1207 resource_script_extension_in_segment,
1208 "https://domain.com/foo.bar/resource.js",
1209 "resource.script",
1210 "https://domain.com/*/resource.js"
1211 );
1212
1213 span_description_test!(
1214 resource_script_missing_scheme,
1215 "domain.com/foo.bar/resource.js",
1216 "resource.script",
1217 "*/resource.js"
1218 );
1219
1220 span_description_test!(
1221 resource_script_missing_scheme_integer_id,
1222 "domain.com/zero-length-00",
1223 "resource.script",
1224 "*/zero-length-*"
1225 );
1226
1227 span_description_test!(db_prisma, "User find", "db.sql.prisma", "User find");
1228
1229 span_description_test!(
1230 function_python,
1231 "sentry.event_manager.assign_event_to_group",
1232 "function",
1233 "sentry.event_manager.assign_event_to_group"
1234 );
1235
1236 span_description_test!(
1237 function_rust,
1238 "symbolicator_native::symbolication::symbolicate::symbolicate",
1239 "function",
1240 "symbolicator_native::symbolication::symbolicate::symbolicate"
1241 );
1242
1243 span_description_test!(
1244 function_with_hex,
1245 "symbolicator_native::symbolication::symbolicate::deadbeef",
1246 "function",
1247 "symbolicator_native::symbolication::symbolicate::*"
1248 );
1249
1250 span_description_test!(
1251 function_with_uuid,
1252 "symbolicator_native::symbolication::fb37f08422034ee985e9fc553ef27e6e::symbolicate",
1253 "function",
1254 "symbolicator_native::symbolication::*::symbolicate"
1255 );
1256
1257 #[test]
1258 fn informed_sql_parser() {
1259 let json = r#"
1260 {
1261 "description": "SELECT \"not an identifier\"",
1262 "span_id": "bd2eb23da2beb459",
1263 "start_timestamp": 1597976393.4619668,
1264 "timestamp": 1597976393.4718769,
1265 "trace_id": "ff62a8b040f340bda5d830223def1d81",
1266 "op": "db",
1267 "data": {"db.system": "mysql"}
1268 }
1269 "#;
1270
1271 let mut span = Annotated::<Span>::from_json(json).unwrap();
1272 let span = span.value_mut().as_mut().unwrap();
1273 let scrubbed = scrub_span_description(span, &[]);
1274 assert_eq!(scrubbed.0.as_deref(), Some("SELECT %s"));
1275 }
1276
1277 #[test]
1278 fn active_record() {
1279 let json = r#"{
1280 "description": "/*some comment `my_function'*/ SELECT `a` FROM `b`",
1281 "op": "db.sql.activerecord"
1282 }"#;
1283
1284 let mut span = Annotated::<Span>::from_json(json).unwrap();
1285
1286 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1287
1288 assert!(scrubbed.0.is_none());
1290 }
1291
1292 #[test]
1293 fn active_record_with_db_system() {
1294 let json = r#"{
1295 "description": "/*some comment `my_function'*/ SELECT `a` FROM `b`",
1296 "op": "db.sql.activerecord",
1297 "data": {
1298 "db.system": "mysql"
1299 }
1300 }"#;
1301
1302 let mut span = Annotated::<Span>::from_json(json).unwrap();
1303
1304 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1305
1306 assert_eq!(scrubbed.0.as_deref(), Some("SELECT a FROM b"));
1308 }
1309
1310 #[test]
1311 fn redis_with_db_system() {
1312 let json = r#"{
1313 "description": "del myveryrandomkey:123Xalsdkxfhn",
1314 "op": "db",
1315 "data": {
1316 "db.system": "redis"
1317 }
1318 }"#;
1319
1320 let mut span = Annotated::<Span>::from_json(json).unwrap();
1321
1322 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1323
1324 assert_eq!(scrubbed.0.as_deref(), Some("DEL *"));
1325 }
1326
1327 #[test]
1328 fn core_data() {
1329 let json = r#"{
1330 "description": "INSERTED 1 'UAEventData'",
1331 "op": "db.sql.transaction",
1332 "origin": "auto.db.core_data"
1333 }"#;
1334
1335 let mut span = Annotated::<Span>::from_json(json).unwrap();
1336
1337 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1338
1339 assert_eq!(scrubbed.0.as_deref(), Some("INSERTED * 'UAEventData'"));
1340 }
1341
1342 #[test]
1343 fn multiple_core_data() {
1344 let json = r#"{
1345 "description": "UPDATED 1 'QueuedRequest', DELETED 1 'QueuedRequest'",
1346 "op": "db.sql.transaction",
1347 "origin": "auto.db.core_data"
1348 }"#;
1349
1350 let mut span = Annotated::<Span>::from_json(json).unwrap();
1351
1352 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1353
1354 assert_eq!(
1355 scrubbed.0.as_deref(),
1356 Some("UPDATED * 'QueuedRequest', DELETED * 'QueuedRequest'")
1357 );
1358 }
1359
1360 #[test]
1361 fn mongodb_scrubbing() {
1362 let json = r#"{
1363 "description": "{\"find\": \"documents\", \"foo\": \"bar\"}",
1364 "op": "db",
1365 "data": {
1366 "db.system": "mongodb",
1367 "db.operation": "find",
1368 "db.collection.name": "documents"
1369 }
1370 }"#;
1371
1372 let mut span = Annotated::<Span>::from_json(json).unwrap();
1373
1374 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1375
1376 assert_eq!(
1377 scrubbed.0.as_deref(),
1378 Some(r#"{"find":"documents","foo":"?"}"#)
1379 )
1380 }
1381
1382 #[test]
1383 fn mongodb_with_legacy_collection_property() {
1384 let json = r#"{
1385 "description": "{\"find\": \"documents\", \"foo\": \"bar\"}",
1386 "op": "db",
1387 "data": {
1388 "db.system": "mongodb",
1389 "db.operation": "find",
1390 "db.mongodb.collection": "documents"
1391 }
1392 }"#;
1393
1394 let mut span = Annotated::<Span>::from_json(json).unwrap();
1395
1396 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1397
1398 assert_eq!(
1399 scrubbed.0.as_deref(),
1400 Some(r#"{"find":"documents","foo":"?"}"#)
1401 )
1402 }
1403
1404 #[test]
1405 fn ui_interaction_with_component_name() {
1406 let json = r#"{
1407 "description": "input.app-asdfasfg.asdfasdf[type=\"range\"][name=\"replay-timeline\"]",
1408 "op": "ui.interaction.click",
1409 "data": {
1410 "ui.component_name": "my-component-name"
1411 }
1412 }"#;
1413
1414 let mut span = Annotated::<Span>::from_json(json).unwrap();
1415
1416 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1417
1418 assert_eq!(scrubbed.0.as_deref(), Some("my-component-name"));
1420 }
1421
1422 #[test]
1423 fn scrub_allowed_host() {
1424 let examples = [
1425 (
1426 "https://foo.bar.internal/api/v1/submit",
1427 ["foo.bar.internal".to_owned()],
1428 "https://foo.bar.internal",
1429 ),
1430 (
1431 "http://192.168.1.1:3000",
1432 ["192.168.1.1".to_owned()],
1433 "http://192.168.1.1:3000",
1434 ),
1435 (
1436 "http://[1fff:0:a88:85a3::ac1f]:8001/foo",
1437 ["[1fff:0:a88:85a3::ac1f]".to_owned()],
1438 "http://[1fff:0:a88:85a3::ac1f]:8001",
1439 ),
1440 ];
1441
1442 for (url, allowed_hosts, expected) in examples {
1443 let json = format!(
1444 r#"{{
1445 "description": "POST {url}",
1446 "span_id": "bd2eb23da2beb459",
1447 "start_timestamp": 1597976393.4619668,
1448 "timestamp": 1597976393.4718769,
1449 "trace_id": "ff62a8b040f340bda5d830223def1d81",
1450 "op": "http.client"
1451 }}
1452 "#,
1453 );
1454
1455 let mut span = Annotated::<Span>::from_json(&json).unwrap();
1456
1457 let scrubbed =
1458 scrub_span_description(span.value_mut().as_mut().unwrap(), &allowed_hosts);
1459
1460 assert_eq!(
1461 scrubbed.0.as_deref(),
1462 Some(format!("POST {expected}").as_str()),
1463 "Could not match {url}"
1464 );
1465 }
1466 }
1467
1468 macro_rules! mongodb_scrubbing_test {
1469 ($name:ident, $description_in:expr, $operation_in:literal, $collection_in:literal, $expected:literal) => {
1474 #[test]
1475 fn $name() {
1476 let json = format!(
1477 r#"
1478 {{
1479 "description": "",
1480 "span_id": "bd2eb23da2beb459",
1481 "start_timestamp": 1597976393.4619668,
1482 "timestamp": 1597976393.4718769,
1483 "trace_id": "ff62a8b040f340bda5d830223def1d81",
1484 "op": "db",
1485 "data": {{
1486 "db.system": "mongodb",
1487 "db.operation": {},
1488 "db.collection.name": {}
1489 }}
1490 }}
1491 "#,
1492 if $operation_in == "" {
1493 "null".to_owned()
1494 } else {
1495 format!("\"{}\"", $operation_in)
1496 },
1497 if $collection_in == "" {
1498 "null".to_owned()
1499 } else {
1500 format!("\"{}\"", $collection_in)
1501 }
1502 );
1503
1504 let mut span = Annotated::<Span>::from_json(&json).unwrap();
1505 span.value_mut()
1506 .as_mut()
1507 .unwrap()
1508 .description
1509 .set_value(Some($description_in.into()));
1510
1511 let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]);
1512
1513 if $expected == "" {
1514 assert!(scrubbed.0.is_none());
1515 } else {
1516 assert_eq!($expected, scrubbed.0.unwrap());
1517 }
1518 }
1519 };
1520 }
1521
1522 mongodb_scrubbing_test!(
1523 mongodb_basic_query,
1524 r#"{"find": "documents", "showRecordId": true}"#,
1525 "find",
1526 "documents",
1527 r#"{"find":"documents","showRecordId":"?"}"#
1528 );
1529
1530 mongodb_scrubbing_test!(
1531 mongodb_query_with_document_param,
1532 r#"{"find": "documents", "filter": {"foo": "bar"}}"#,
1533 "find",
1534 "documents",
1535 r#"{"filter":{"foo":"?"},"find":"documents"}"#
1536 );
1537
1538 mongodb_scrubbing_test!(
1539 mongodb_query_without_operation,
1540 r#"{"filter": {"foo": "bar"}}"#,
1541 "find",
1542 "documents",
1543 r#"{"filter":{"foo":"?"},"find":"documents"}"#
1544 );
1545
1546 mongodb_scrubbing_test!(
1547 mongodb_without_collection_in_data,
1548 r#"{"find": "documents", "showRecordId": true}"#,
1549 "find",
1550 "",
1551 ""
1552 );
1553
1554 mongodb_scrubbing_test!(
1555 mongodb_without_operation_in_data,
1556 r#"{"find": "documents", "showRecordId": true}"#,
1557 "",
1558 "documents",
1559 ""
1560 );
1561
1562 mongodb_scrubbing_test!(
1563 mongodb_max_depth,
1564 r#"{"update": "coll", "updates": {"q": {"_id": "1"}, "u": {"$set": {"foo": {"bar": {"baz": "quux"}}}}}}"#,
1565 "update",
1566 "coll",
1567 r#"{"update":"coll","updates":{"q":{"_id":"?"},"u":{"$set":{"foo":"?"}}}}"#
1568 );
1569
1570 mongodb_scrubbing_test!(
1571 mongodb_identifier_in_collection,
1572 r#"{"find": "documents001", "showRecordId": true}"#,
1573 "find",
1574 "documents001",
1575 r#"{"find":"documents{%s}","showRecordId":"?"}"#
1576 );
1577
1578 mongodb_scrubbing_test!(
1579 mongodb_query_with_array,
1580 r#"{"insert": "documents", "documents": [{"foo": "bar"}, {"baz": "quux"}, {"qux": "quuz"}]}"#,
1581 "insert",
1582 "documents",
1583 r#"{"documents":["..."],"insert":"documents"}"#
1584 );
1585
1586 mongodb_scrubbing_test!(
1587 mongodb_query_with_buffer,
1588 r#"{"insert": "documents", "buffer": {"0": "a", "1": "b", "2": "c"}, "documents": [{"foo": "bar"}]}"#,
1589 "insert",
1590 "documents",
1591 r#"{"documents":["..."],"insert":"documents"}"#
1592 );
1593}