Skip to main content

relay_sampling/
evaluation.rs

1//! Evaluation of dynamic sampling rules.
2
3use std::fmt;
4use std::num::ParseIntError;
5use std::ops::ControlFlow;
6
7use chrono::{DateTime, Utc};
8use rand::Rng;
9use rand::distr::StandardUniform;
10use rand_pcg::Pcg32;
11use relay_protocol::Getter;
12use serde::Serialize;
13use uuid::Uuid;
14
15use crate::config::{RuleId, SamplingRule, SamplingValue};
16
17/// Generates a pseudo random number by seeding the generator with the given id.
18///
19/// The return is deterministic, always generates the same number from the same id.
20fn pseudo_random_from_seed(seed: Uuid) -> f64 {
21    let seed_number = seed.as_u128();
22    let mut generator = Pcg32::new((seed_number >> 64) as u64, seed_number as u64);
23    generator.sample(StandardUniform)
24}
25
26/// State machine for dynamic sampling.
27#[derive(Debug)]
28pub struct SamplingEvaluator {
29    now: DateTime<Utc>,
30    rule_ids: Vec<RuleId>,
31    factor: f64,
32    minimum_sample_rate: Option<f64>,
33}
34
35impl SamplingEvaluator {
36    /// Constructs an evaluator.
37    pub fn new(now: DateTime<Utc>) -> Self {
38        Self {
39            now,
40            rule_ids: vec![],
41            factor: 1.0,
42            minimum_sample_rate: None,
43        }
44    }
45
46    /// Attempts to find a match for sampling rules using `ControlFlow`.
47    ///
48    /// This function returns a `ControlFlow` to provide control over the matching process.
49    ///
50    /// - `ControlFlow::Continue`: Indicates that matching is incomplete, and more rules can be evaluated.
51    ///    - This state occurs either if no active rules match the provided data, or if the matched rules
52    ///      are factors requiring a final sampling value.
53    ///    - The returned evaluator contains the state of the matched rules and the accumulated sampling factor.
54    ///    - If this value is returned and there are no more rules to evaluate, it should be interpreted as "no match."
55    ///
56    /// - `ControlFlow::Break`: Indicates that one or more rules have successfully matched.
57    pub fn match_rules<'a, I, G>(
58        mut self,
59        seed: Uuid,
60        instance: &G,
61        rules: I,
62    ) -> ControlFlow<SamplingMatch, Self>
63    where
64        G: Getter,
65        I: Iterator<Item = &'a SamplingRule>,
66    {
67        for rule in rules {
68            if !rule.time_range.contains(self.now) || !rule.condition.matches(instance) {
69                continue;
70            };
71
72            if let Some(sample_rate) = self.try_compute_sample_rate(rule) {
73                return ControlFlow::Break(SamplingMatch::new(sample_rate, seed, self.rule_ids));
74            };
75        }
76
77        ControlFlow::Continue(self)
78    }
79
80    /// Attempts to compute the sample rate for a given [`SamplingRule`].
81    ///
82    /// # Returns
83    ///
84    /// - `None` if the sampling rule is invalid, expired, or if the final sample rate has not been
85    ///   determined yet.
86    /// - `Some` if the computed sample rate should be applied directly.
87    fn try_compute_sample_rate(&mut self, rule: &SamplingRule) -> Option<f64> {
88        match rule.sampling_value {
89            SamplingValue::Factor { value } => {
90                self.factor *= rule.apply_decaying_fn(value, self.now)?;
91                self.rule_ids.push(rule.id);
92                None
93            }
94            SamplingValue::SampleRate { value } => {
95                let sample_rate = rule.apply_decaying_fn(value, self.now)?;
96                let minimum_sample_rate = self.minimum_sample_rate.unwrap_or(0.0);
97                let adjusted = (sample_rate.max(minimum_sample_rate) * self.factor).clamp(0.0, 1.0);
98
99                self.rule_ids.push(rule.id);
100                Some(adjusted)
101            }
102            SamplingValue::MinimumSampleRate { value } => {
103                if self.minimum_sample_rate.is_none() {
104                    self.minimum_sample_rate = Some(rule.apply_decaying_fn(value, self.now)?);
105                    self.rule_ids.push(rule.id);
106                }
107                None
108            }
109        }
110    }
111}
112
113fn sampling_match(sample_rate: f64, seed: Uuid) -> SamplingDecision {
114    if sample_rate <= 0.0 {
115        return SamplingDecision::Drop;
116    } else if sample_rate >= 1.0 {
117        return SamplingDecision::Keep;
118    }
119
120    let random_number = pseudo_random_from_seed(seed);
121    relay_log::trace!(
122        sample_rate,
123        random_number,
124        "applying dynamic sampling to matching event"
125    );
126
127    if random_number >= sample_rate {
128        relay_log::trace!("dropping event that matched the configuration");
129        SamplingDecision::Drop
130    } else {
131        relay_log::trace!("keeping event that matched the configuration");
132        SamplingDecision::Keep
133    }
134}
135
136/// A sampling decision.
137#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
138pub enum SamplingDecision {
139    /// The item is sampled and should not be dropped.
140    Keep,
141    /// The item is not sampled and should be dropped.
142    Drop,
143}
144
145impl SamplingDecision {
146    /// Returns `true` if the sampling decision is [`Self::Keep`].
147    pub fn is_keep(self) -> bool {
148        matches!(self, Self::Keep)
149    }
150
151    /// Returns `true` if the sampling decision is [`Self::Drop`].
152    pub fn is_drop(self) -> bool {
153        matches!(self, Self::Drop)
154    }
155
156    /// Returns a string representation of the sampling decision.
157    pub fn as_str(self) -> &'static str {
158        match self {
159            Self::Keep => "keep",
160            Self::Drop => "drop",
161        }
162    }
163}
164
165impl fmt::Display for SamplingDecision {
166    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
167        write!(f, "{}", self.as_str())
168    }
169}
170
171/// Represents the specification for sampling an incoming event.
172#[derive(Clone, Debug, PartialEq)]
173pub struct SamplingMatch {
174    /// The sample rate to use for the incoming event.
175    sample_rate: f64,
176    /// The seed to feed to the random number generator which allows the same number to be
177    /// generated given the same seed.
178    ///
179    /// This is especially important for trace sampling, even though we can have inconsistent
180    /// traces due to multi-matching.
181    seed: Uuid,
182    /// The list of rule ids that have matched the incoming event and/or dynamic sampling context.
183    matched_rules: MatchedRuleIds,
184    /// Whether this sampling match results in the item getting sampled.
185    /// It's essentially a cache, as the value can be deterministically derived from
186    /// the sample rate and the seed.
187    decision: SamplingDecision,
188}
189
190impl SamplingMatch {
191    fn new(sample_rate: f64, seed: Uuid, matched_rules: Vec<RuleId>) -> Self {
192        let matched_rules = MatchedRuleIds(matched_rules);
193        let decision = sampling_match(sample_rate, seed);
194
195        Self {
196            sample_rate,
197            seed,
198            matched_rules,
199            decision,
200        }
201    }
202
203    /// Returns the sample rate.
204    pub fn sample_rate(&self) -> f64 {
205        self.sample_rate
206    }
207
208    /// Returns the matched rules for the sampling match.
209    ///
210    /// Takes ownership, useful if you don't need the [`SamplingMatch`] anymore
211    /// and you want to avoid allocations.
212    pub fn into_matched_rules(self) -> MatchedRuleIds {
213        self.matched_rules
214    }
215
216    /// Returns the sampling decision.
217    pub fn decision(&self) -> SamplingDecision {
218        self.decision
219    }
220}
221
222/// Represents a list of rule ids which is used for outcomes.
223#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
224pub struct MatchedRuleIds(pub Vec<RuleId>);
225
226impl MatchedRuleIds {
227    /// Parses `MatchedRuleIds` from a string with concatenated rule identifiers.
228    ///
229    /// The format it parses from is:
230    ///
231    /// ```text
232    /// rule_id_1,rule_id_2,...
233    /// ```
234    pub fn parse(value: &str) -> Result<MatchedRuleIds, ParseIntError> {
235        let mut rule_ids = vec![];
236
237        for rule_id in value.split(',') {
238            rule_ids.push(RuleId(rule_id.parse()?));
239        }
240
241        Ok(MatchedRuleIds(rule_ids))
242    }
243}
244
245impl fmt::Display for MatchedRuleIds {
246    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
247        for (i, rule_id) in self.0.iter().enumerate() {
248            if i > 0 {
249                write!(f, ",")?;
250            }
251            write!(f, "{rule_id}")?;
252        }
253
254        Ok(())
255    }
256}
257
258#[cfg(test)]
259mod tests {
260    use chrono::TimeZone;
261    use relay_protocol::RuleCondition;
262    use similar_asserts::assert_eq;
263    use std::str::FromStr;
264    use uuid::Uuid;
265
266    use crate::DynamicSamplingContext;
267    use crate::config::{DecayingFunction, RuleType, TimeRange};
268    use crate::dsc::TraceUserContext;
269
270    use super::*;
271
272    /// Helper to extract the sampling match after evaluating rules.
273    fn get_sampling_match(rules: &[SamplingRule], instance: &impl Getter) -> SamplingMatch {
274        match SamplingEvaluator::new(Utc::now()).match_rules(
275            Uuid::default(),
276            instance,
277            rules.iter(),
278        ) {
279            ControlFlow::Break(sampling_match) => sampling_match,
280            ControlFlow::Continue(_) => panic!("no match found"),
281        }
282    }
283
284    fn evaluation_is_match(res: ControlFlow<SamplingMatch, SamplingEvaluator>) -> bool {
285        matches!(res, ControlFlow::Break(_))
286    }
287
288    /// Helper to check if certain rules are matched on.
289    fn matches_rule_ids(rule_ids: &[u32], rules: &[SamplingRule], instance: &impl Getter) -> bool {
290        let matched_rule_ids = MatchedRuleIds(rule_ids.iter().map(|num| RuleId(*num)).collect());
291        let sampling_match = get_sampling_match(rules, instance);
292        matched_rule_ids == sampling_match.matched_rules
293    }
294
295    /// Helper function to create a dsc with the provided getter-values set.
296    fn mocked_dsc_with_getter_values(
297        paths_and_values: Vec<(&str, &str)>,
298    ) -> DynamicSamplingContext {
299        let mut dsc = DynamicSamplingContext {
300            trace_id: "67e5504410b1426f9247bb680e5fe0c8".parse().unwrap(),
301            public_key: "12345678123456781234567812345678".parse().unwrap(),
302            release: None,
303            environment: None,
304            transaction: None,
305            sample_rate: None,
306            user: TraceUserContext::default(),
307            replay_id: None,
308            sampled: None,
309            other: Default::default(),
310        };
311
312        for (path, value) in paths_and_values {
313            match path {
314                "trace.release" => dsc.release = Some(value.to_owned()),
315                "trace.environment" => dsc.environment = Some(value.to_owned()),
316                "trace.user.id" => value.clone_into(&mut dsc.user.user_id),
317                "trace.user.segment" => value.clone_into(&mut dsc.user.user_segment),
318                "trace.transaction" => dsc.transaction = Some(value.to_owned()),
319                "trace.replay_id" => dsc.replay_id = Some(Uuid::from_str(value).unwrap()),
320                _ => panic!("invalid path"),
321            }
322        }
323
324        dsc
325    }
326
327    fn is_match(now: DateTime<Utc>, rule: &SamplingRule, dsc: &DynamicSamplingContext) -> bool {
328        SamplingEvaluator::new(now)
329            .match_rules(Uuid::default(), dsc, std::iter::once(rule))
330            .is_break()
331    }
332
333    #[test]
334    fn test_sample_rate_compounding() {
335        let rules = simple_sampling_rules(vec![
336            (RuleCondition::all(), SamplingValue::Factor { value: 0.8 }),
337            (RuleCondition::all(), SamplingValue::Factor { value: 0.5 }),
338            (
339                RuleCondition::all(),
340                SamplingValue::SampleRate { value: 0.25 },
341            ),
342        ]);
343        let dsc = mocked_dsc_with_getter_values(vec![]);
344
345        // 0.8 * 0.5 * 0.25 == 0.1
346        assert_eq!(get_sampling_match(&rules, &dsc).sample_rate(), 0.1);
347    }
348
349    #[test]
350    fn test_minimum_sample_rate() {
351        let rules = simple_sampling_rules(vec![
352            (RuleCondition::all(), SamplingValue::Factor { value: 1.5 }),
353            (
354                RuleCondition::all(),
355                SamplingValue::MinimumSampleRate { value: 0.5 },
356            ),
357            // Only the first matching minimum is applied.
358            (
359                RuleCondition::all(),
360                SamplingValue::MinimumSampleRate { value: 1.0 },
361            ),
362            (
363                RuleCondition::all(),
364                SamplingValue::SampleRate { value: 0.05 },
365            ),
366        ]);
367        let dsc = mocked_dsc_with_getter_values(vec![]);
368
369        // max(0.05, 0.5) * 1.5 = 0.75
370        assert_eq!(get_sampling_match(&rules, &dsc).sample_rate(), 0.75);
371    }
372
373    fn mocked_sampling_rule() -> SamplingRule {
374        SamplingRule {
375            condition: RuleCondition::all(),
376            sampling_value: SamplingValue::SampleRate { value: 1.0 },
377            ty: RuleType::Trace,
378            id: RuleId(0),
379            time_range: Default::default(),
380            decaying_fn: Default::default(),
381        }
382    }
383
384    /// Helper function to quickly construct many rules with their condition and value, and a unique id,
385    /// so the caller can easily check which rules are matching.
386    fn simple_sampling_rules(vals: Vec<(RuleCondition, SamplingValue)>) -> Vec<SamplingRule> {
387        let mut vec = vec![];
388
389        for (i, val) in vals.into_iter().enumerate() {
390            let (condition, sampling_value) = val;
391            vec.push(SamplingRule {
392                condition,
393                sampling_value,
394                ty: RuleType::Trace,
395                id: RuleId(i as u32),
396                time_range: Default::default(),
397                decaying_fn: Default::default(),
398            });
399        }
400        vec
401    }
402
403    /// Checks that rules don't match if the time is outside the time range.
404    #[test]
405    fn test_expired_rules() {
406        let rule = SamplingRule {
407            condition: RuleCondition::all(),
408            sampling_value: SamplingValue::SampleRate { value: 1.0 },
409            ty: RuleType::Trace,
410            id: RuleId(0),
411            time_range: TimeRange {
412                start: Some(Utc.with_ymd_and_hms(1970, 10, 10, 0, 0, 0).unwrap()),
413                end: Some(Utc.with_ymd_and_hms(1970, 10, 12, 0, 0, 0).unwrap()),
414            },
415            decaying_fn: Default::default(),
416        };
417
418        let dsc = mocked_dsc_with_getter_values(vec![]);
419
420        // Baseline test.
421        let within_timerange = Utc.with_ymd_and_hms(1970, 10, 11, 0, 0, 0).unwrap();
422        let res = SamplingEvaluator::new(within_timerange).match_rules(
423            Uuid::default(),
424            &dsc,
425            [rule.clone()].iter(),
426        );
427        assert!(evaluation_is_match(res));
428
429        let before_timerange = Utc.with_ymd_and_hms(1969, 1, 1, 0, 0, 0).unwrap();
430        let res = SamplingEvaluator::new(before_timerange).match_rules(
431            Uuid::default(),
432            &dsc,
433            [rule.clone()].iter(),
434        );
435        assert!(!evaluation_is_match(res));
436
437        let after_timerange = Utc.with_ymd_and_hms(1971, 1, 1, 0, 0, 0).unwrap();
438        let res = SamplingEvaluator::new(after_timerange).match_rules(
439            Uuid::default(),
440            &dsc,
441            [rule].iter(),
442        );
443        assert!(!evaluation_is_match(res));
444    }
445
446    /// Checks that `SamplingValueEvaluator` correctly matches the right rules.
447    #[test]
448    fn test_condition_matching() {
449        let rules = simple_sampling_rules(vec![
450            (
451                RuleCondition::glob("trace.transaction", "*healthcheck*"),
452                SamplingValue::SampleRate { value: 1.0 },
453            ),
454            (
455                RuleCondition::glob("trace.environment", "*dev*"),
456                SamplingValue::SampleRate { value: 1.0 },
457            ),
458            (
459                RuleCondition::eq_ignore_case("trace.transaction", "raboof"),
460                SamplingValue::Factor { value: 1.0 },
461            ),
462            (
463                RuleCondition::glob("trace.release", "1.1.1")
464                    & RuleCondition::eq_ignore_case("trace.user.segment", "vip"),
465                SamplingValue::SampleRate { value: 1.0 },
466            ),
467            (
468                RuleCondition::eq_ignore_case("trace.release", "1.1.1")
469                    & RuleCondition::eq_ignore_case("trace.environment", "prod"),
470                SamplingValue::Factor { value: 1.0 },
471            ),
472            (
473                RuleCondition::all(),
474                SamplingValue::SampleRate { value: 1.0 },
475            ),
476        ]);
477
478        // early return of first rule
479        let dsc = mocked_dsc_with_getter_values(vec![("trace.transaction", "foohealthcheckbar")]);
480        assert!(matches_rule_ids(&[0], &rules, &dsc));
481
482        // early return of second rule
483        let dsc = mocked_dsc_with_getter_values(vec![("trace.environment", "dev")]);
484        assert!(matches_rule_ids(&[1], &rules, &dsc));
485
486        // factor match third rule and early return sixth rule
487        let dsc = mocked_dsc_with_getter_values(vec![("trace.transaction", "raboof")]);
488        assert!(matches_rule_ids(&[2, 5], &rules, &dsc));
489
490        // factor match third rule and early return fourth rule
491        let dsc = mocked_dsc_with_getter_values(vec![
492            ("trace.transaction", "raboof"),
493            ("trace.release", "1.1.1"),
494            ("trace.user.segment", "vip"),
495        ]);
496        assert!(matches_rule_ids(&[2, 3], &rules, &dsc));
497
498        // factor match third, fifth rule and early return sixth rule
499        let dsc = mocked_dsc_with_getter_values(vec![
500            ("trace.transaction", "raboof"),
501            ("trace.release", "1.1.1"),
502            ("trace.environment", "prod"),
503        ]);
504        assert!(matches_rule_ids(&[2, 4, 5], &rules, &dsc));
505
506        // factor match fifth and early return sixth rule
507        let dsc = mocked_dsc_with_getter_values(vec![
508            ("trace.release", "1.1.1"),
509            ("trace.environment", "prod"),
510        ]);
511        assert!(matches_rule_ids(&[4, 5], &rules, &dsc));
512    }
513
514    #[test]
515    /// Test that we get the same sampling decision from the same trace id
516    fn test_repeatable_seed() {
517        let val1 = pseudo_random_from_seed(Uuid::default());
518        let val2 = pseudo_random_from_seed(Uuid::default());
519        assert!(val1 + f64::EPSILON > val2 && val2 + f64::EPSILON > val1);
520    }
521
522    #[test]
523    /// Tests if the MatchedRuleIds struct is displayed correctly as string.
524    fn matched_rule_ids_display() {
525        let matched_rule_ids = MatchedRuleIds(vec![RuleId(123), RuleId(456)]);
526        assert_eq!(matched_rule_ids.to_string(), "123,456");
527
528        let matched_rule_ids = MatchedRuleIds(vec![RuleId(123)]);
529        assert_eq!(matched_rule_ids.to_string(), "123");
530
531        let matched_rule_ids = MatchedRuleIds(vec![]);
532        assert_eq!(matched_rule_ids.to_string(), "")
533    }
534
535    #[test]
536    /// Tests if the MatchRuleIds struct is created correctly from its string representation.
537    fn matched_rule_ids_parse() {
538        assert_eq!(
539            MatchedRuleIds::parse("123,456"),
540            Ok(MatchedRuleIds(vec![RuleId(123), RuleId(456)]))
541        );
542
543        assert_eq!(
544            MatchedRuleIds::parse("123"),
545            Ok(MatchedRuleIds(vec![RuleId(123)]))
546        );
547
548        assert!(MatchedRuleIds::parse("").is_err());
549
550        assert!(MatchedRuleIds::parse(",").is_err());
551
552        assert!(MatchedRuleIds::parse("123.456").is_err());
553
554        assert!(MatchedRuleIds::parse("a,b").is_err());
555    }
556
557    #[test]
558    /// Tests that no match is done when there are no matching rules.
559    fn test_get_sampling_match_result_with_no_match() {
560        let dsc = mocked_dsc_with_getter_values(vec![]);
561
562        let res = SamplingEvaluator::new(Utc::now()).match_rules(Uuid::default(), &dsc, [].iter());
563
564        assert!(!evaluation_is_match(res));
565    }
566
567    /// Validates the early return (and hence no match) of the `match_rules` function if the current
568    /// time is out of bounds of the time range.
569    /// When the `start` or `end` of the range is missing, it defaults to always include
570    /// times before the `end` or after the `start`, respectively.
571    #[test]
572    fn test_sample_rate_valid_time_range() {
573        let dsc = mocked_dsc_with_getter_values(vec![]);
574        let time_range = TimeRange {
575            start: Some(Utc.with_ymd_and_hms(1970, 1, 1, 0, 0, 0).unwrap()),
576            end: Some(Utc.with_ymd_and_hms(1980, 1, 1, 0, 0, 0).unwrap()),
577        };
578
579        let before_time_range = Utc.with_ymd_and_hms(1969, 1, 1, 0, 0, 0).unwrap();
580        let during_time_range = Utc.with_ymd_and_hms(1975, 1, 1, 0, 0, 0).unwrap();
581        let after_time_range = Utc.with_ymd_and_hms(1981, 1, 1, 0, 0, 0).unwrap();
582
583        let rule = SamplingRule {
584            condition: RuleCondition::all(),
585            sampling_value: SamplingValue::SampleRate { value: 1.0 },
586            ty: RuleType::Trace,
587            id: RuleId(0),
588            time_range,
589            decaying_fn: DecayingFunction::Constant,
590        };
591
592        // [start..end]
593        assert!(!is_match(before_time_range, &rule, &dsc));
594        assert!(is_match(during_time_range, &rule, &dsc));
595        assert!(!is_match(after_time_range, &rule, &dsc));
596
597        // [start..]
598        let mut rule_without_end = rule.clone();
599        rule_without_end.time_range.end = None;
600        assert!(!is_match(before_time_range, &rule_without_end, &dsc));
601        assert!(is_match(during_time_range, &rule_without_end, &dsc));
602        assert!(is_match(after_time_range, &rule_without_end, &dsc));
603
604        // [..end]
605        let mut rule_without_start = rule.clone();
606        rule_without_start.time_range.start = None;
607        assert!(is_match(before_time_range, &rule_without_start, &dsc));
608        assert!(is_match(during_time_range, &rule_without_start, &dsc));
609        assert!(!is_match(after_time_range, &rule_without_start, &dsc));
610
611        // [..]
612        let mut rule_without_range = rule.clone();
613        rule_without_range.time_range = TimeRange::default();
614        assert!(is_match(before_time_range, &rule_without_range, &dsc));
615        assert!(is_match(during_time_range, &rule_without_range, &dsc));
616        assert!(is_match(after_time_range, &rule_without_range, &dsc));
617    }
618
619    /// Checks that `validate_match` yields the correct controlflow given the SamplingValue variant.
620    #[test]
621    fn test_validate_match() {
622        let mut rule = mocked_sampling_rule();
623        let mut eval = SamplingEvaluator::new(Utc::now());
624
625        rule.sampling_value = SamplingValue::SampleRate { value: 1.0 };
626        assert_eq!(eval.try_compute_sample_rate(&rule), Some(1.0));
627
628        rule.sampling_value = SamplingValue::Factor { value: 1.0 };
629        assert_eq!(eval.try_compute_sample_rate(&rule), None);
630
631        rule.sampling_value = SamplingValue::MinimumSampleRate { value: 1.0 };
632        assert_eq!(eval.try_compute_sample_rate(&rule), None);
633    }
634}