relay_monitors/
lib.rs

1//! Monitors protocol and processing for Sentry.
2//!
3//! [Monitors] allow you to monitor the uptime and performance of any scheduled, recurring job in
4//! Sentry. Once implemented, it'll allow you to get alerts and metrics to help you solve errors,
5//! detect timeouts, and prevent disruptions to your service.
6//!
7//! # API
8//!
9//! The public API documentation is available on [Sentry Docs](https://docs.sentry.io/api/crons/).
10//!
11//! [monitors]: https://docs.sentry.io/product/crons/
12
13#![doc(
14    html_logo_url = "https://raw.githubusercontent.com/getsentry/relay/master/artwork/relay-icon.png",
15    html_favicon_url = "https://raw.githubusercontent.com/getsentry/relay/master/artwork/relay-icon.png"
16)]
17#![warn(missing_docs)]
18
19use std::sync::OnceLock;
20
21use relay_base_schema::project::ProjectId;
22use relay_event_schema::protocol::{EventId, TraceId};
23use serde::{Deserialize, Serialize};
24use uuid::Uuid;
25
26/// Maximum length of monitor slugs.
27const SLUG_LENGTH: usize = 50;
28
29/// Maximum length of environment names.
30const ENVIRONMENT_LENGTH: usize = 64;
31
32/// Error returned from [`process_check_in`].
33#[derive(Debug, thiserror::Error)]
34pub enum ProcessCheckInError {
35    /// Failed to deserialize the payload.
36    #[error("failed to deserialize check in")]
37    Json(#[from] serde_json::Error),
38
39    /// Monitor slug was empty after slugification.
40    #[error("the monitor slug is empty or invalid")]
41    EmptySlug,
42
43    /// Environment name was invalid.
44    #[error("the environment is invalid")]
45    InvalidEnvironment,
46}
47
48/// Describes the status of the incoming CheckIn.
49#[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)]
50#[serde(rename_all = "snake_case")]
51pub enum CheckInStatus {
52    /// Check-in had no issues during execution.
53    Ok,
54    /// Check-in failed or otherwise had some issues.
55    Error,
56    /// Check-in is expectred to complete.
57    InProgress,
58    /// Monitor did not check in on time.
59    Missed,
60    /// No status was passed.
61    #[serde(other)]
62    Unknown,
63}
64
65#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
66#[serde(rename_all = "snake_case")]
67#[serde(tag = "type")]
68enum Schedule {
69    Crontab { value: String },
70    Interval { value: u64, unit: IntervalName },
71}
72
73#[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)]
74#[serde(rename_all = "snake_case")]
75enum IntervalName {
76    Year,
77    Month,
78    Week,
79    Day,
80    Hour,
81    Minute,
82}
83
84/// The monitor configuration payload for upserting monitors during check-in
85#[derive(Debug, Deserialize, Serialize)]
86pub struct MonitorConfig {
87    /// The monitor schedule configuration
88    schedule: Schedule,
89
90    /// How long (in minutes) after the expected checkin time will we wait until we consider the
91    /// checkin to have been missed.
92    #[serde(default, skip_serializing_if = "Option::is_none")]
93    checkin_margin: Option<u64>,
94
95    /// How long (in minutes) is the check-in allowed to run for in in_progress before it is
96    /// considered failed.
97    #[serde(default, skip_serializing_if = "Option::is_none")]
98    max_runtime: Option<u64>,
99
100    /// tz database style timezone string
101    #[serde(default, skip_serializing_if = "Option::is_none")]
102    timezone: Option<String>,
103
104    /// How many consecutive failed check-ins it takes to create an issue.
105    #[serde(default, skip_serializing_if = "Option::is_none")]
106    failure_issue_threshold: Option<u64>,
107
108    /// How many consecutive OK check-ins it takes to resolve an issue.
109    #[serde(default, skip_serializing_if = "Option::is_none")]
110    recovery_threshold: Option<u64>,
111
112    /// Who the owner of the monitor should be. Uses the ActorTuple [0]
113    /// identifier format.
114    ///
115    /// [0]: https://github.com/getsentry/sentry/blob/3644f5c4f2a99073bf925181b5237a6e05c1d6c2/src/sentry/utils/actor.py#L17
116    #[serde(default, skip_serializing_if = "Option::is_none")]
117    owner: Option<String>,
118}
119
120/// The trace context sent with a check-in.
121#[derive(Debug, Deserialize, Serialize)]
122pub struct CheckInTrace {
123    /// Trace-ID of the check-in.
124    trace_id: TraceId,
125}
126
127/// Any contexts sent in the check-in payload.
128#[derive(Debug, Deserialize, Serialize)]
129pub struct CheckInContexts {
130    /// Trace context sent with a check-in.
131    #[serde(default, skip_serializing_if = "Option::is_none")]
132    trace: Option<CheckInTrace>,
133}
134
135/// The monitor check-in payload.
136#[derive(Debug, Deserialize, Serialize)]
137pub struct CheckIn {
138    /// Unique identifier of this check-in.
139    #[serde(default = "EventId::nil")]
140    pub check_in_id: EventId,
141
142    /// Identifier of the monitor for this check-in.
143    #[serde(default)]
144    pub monitor_slug: String,
145
146    /// Status of this check-in. Defaults to `"unknown"`.
147    pub status: CheckInStatus,
148
149    /// The environment to associate the check-in with
150    #[serde(default, skip_serializing_if = "Option::is_none")]
151    pub environment: Option<String>,
152
153    /// Duration of this check since it has started in seconds.
154    #[serde(default, skip_serializing_if = "Option::is_none")]
155    pub duration: Option<f64>,
156
157    /// monitor configuration to support upserts.
158    #[serde(default, skip_serializing_if = "Option::is_none")]
159    pub monitor_config: Option<MonitorConfig>,
160
161    /// Contexts describing the associated environment of the job run.
162    /// Only supports trace for now.
163    #[serde(default, skip_serializing_if = "Option::is_none")]
164    pub contexts: Option<CheckInContexts>,
165}
166
167/// The result from calling process_check_in
168pub struct ProcessedCheckInResult {
169    /// The routing key to be used for the check-in payload.
170    ///
171    /// Important to help ensure monitor check-ins are processed in order by routing check-ins from
172    /// the same monitor to the same place.
173    pub routing_hint: Uuid,
174
175    /// The JSON payload of the processed check-in.
176    pub payload: Vec<u8>,
177}
178
179/// Normalizes a monitor check-in payload.
180pub fn process_check_in(
181    payload: &[u8],
182    project_id: ProjectId,
183) -> Result<ProcessedCheckInResult, ProcessCheckInError> {
184    let mut check_in = serde_json::from_slice::<CheckIn>(payload)?;
185
186    // Missed status cannot be ingested, this is computed on the server.
187    if check_in.status == CheckInStatus::Missed {
188        check_in.status = CheckInStatus::Unknown;
189    }
190
191    trim_slug(&mut check_in.monitor_slug);
192
193    if check_in.monitor_slug.is_empty() {
194        return Err(ProcessCheckInError::EmptySlug);
195    }
196
197    if check_in
198        .environment
199        .as_ref()
200        .is_some_and(|e| e.chars().count() > ENVIRONMENT_LENGTH)
201    {
202        return Err(ProcessCheckInError::InvalidEnvironment);
203    }
204
205    static NAMESPACE: OnceLock<Uuid> = OnceLock::new();
206    let namespace = NAMESPACE
207        .get_or_init(|| Uuid::new_v5(&Uuid::NAMESPACE_URL, b"https://sentry.io/crons/#did"));
208
209    // Use the project_id + monitor_slug as the routing key hint. This helps ensure monitor
210    // check-ins are processed in order by consistently routing check-ins from the same monitor.
211
212    let slug = &check_in.monitor_slug;
213    let project_id_slug_key = format!("{project_id}:{slug}");
214
215    let routing_hint = Uuid::new_v5(namespace, project_id_slug_key.as_bytes());
216
217    Ok(ProcessedCheckInResult {
218        routing_hint,
219        payload: serde_json::to_vec(&check_in)?,
220    })
221}
222
223fn trim_slug(slug: &mut String) {
224    if let Some((overflow, _)) = slug.char_indices().nth(SLUG_LENGTH) {
225        slug.truncate(overflow);
226    }
227}
228
229#[cfg(test)]
230mod tests {
231    use similar_asserts::assert_eq;
232
233    use super::*;
234
235    #[test]
236    fn truncate_basic() {
237        let mut test1 = "test_".repeat(50);
238        trim_slug(&mut test1);
239        assert_eq!("test_test_test_test_test_test_test_test_test_test_", test1,);
240
241        let mut test2 = "🦀".repeat(SLUG_LENGTH + 10);
242        trim_slug(&mut test2);
243        assert_eq!("🦀".repeat(SLUG_LENGTH), test2);
244    }
245
246    #[test]
247    fn serialize_json_roundtrip() {
248        let json = r#"{
249  "check_in_id": "a460c25ff2554577b920fcfacae4e5eb",
250  "monitor_slug": "my-monitor",
251  "status": "in_progress",
252  "environment": "production",
253  "duration": 21.0,
254  "contexts": {
255    "trace": {
256      "trace_id": "8f431b7aa08441bbbd5a0100fd91f9fe"
257    }
258  }
259}"#;
260
261        let check_in = serde_json::from_str::<CheckIn>(json).unwrap();
262        let serialized = serde_json::to_string_pretty(&check_in).unwrap();
263
264        assert_eq!(json, serialized);
265    }
266
267    #[test]
268    fn serialize_with_upsert_short() {
269        let json = r#"{
270  "check_in_id": "a460c25ff2554577b920fcfacae4e5eb",
271  "monitor_slug": "my-monitor",
272  "status": "in_progress",
273  "monitor_config": {
274    "schedule": {
275      "type": "crontab",
276      "value": "0 * * * *"
277    }
278  }
279}"#;
280
281        let check_in = serde_json::from_str::<CheckIn>(json).unwrap();
282        let serialized = serde_json::to_string_pretty(&check_in).unwrap();
283
284        assert_eq!(json, serialized);
285    }
286
287    #[test]
288    fn serialize_with_upsert_interval() {
289        let json = r#"{
290  "check_in_id": "a460c25ff2554577b920fcfacae4e5eb",
291  "monitor_slug": "my-monitor",
292  "status": "in_progress",
293  "monitor_config": {
294    "schedule": {
295      "type": "interval",
296      "value": 5,
297      "unit": "day"
298    },
299    "checkin_margin": 5,
300    "max_runtime": 10,
301    "timezone": "America/Los_Angles",
302    "failure_issue_threshold": 3,
303    "recovery_threshold": 1
304  }
305}"#;
306
307        let check_in = serde_json::from_str::<CheckIn>(json).unwrap();
308        let serialized = serde_json::to_string_pretty(&check_in).unwrap();
309
310        assert_eq!(json, serialized);
311    }
312
313    #[test]
314    fn serialize_with_upsert_full() {
315        let json = r#"{
316  "check_in_id": "a460c25ff2554577b920fcfacae4e5eb",
317  "monitor_slug": "my-monitor",
318  "status": "in_progress",
319  "monitor_config": {
320    "schedule": {
321      "type": "crontab",
322      "value": "0 * * * *"
323    },
324    "checkin_margin": 5,
325    "max_runtime": 10,
326    "timezone": "America/Los_Angles",
327    "failure_issue_threshold": 3,
328    "recovery_threshold": 1,
329    "owner": "user:123"
330  }
331}"#;
332
333        let check_in = serde_json::from_str::<CheckIn>(json).unwrap();
334        let serialized = serde_json::to_string_pretty(&check_in).unwrap();
335
336        assert_eq!(json, serialized);
337    }
338
339    #[test]
340    fn process_simple() {
341        let json = r#"{"check_in_id":"a460c25ff2554577b920fcfacae4e5eb","monitor_slug":"my-monitor","status":"ok"}"#;
342
343        let result = process_check_in(json.as_bytes(), ProjectId::new(1));
344
345        // The routing_hint should be consistent for the (project_id, monitor_slug)
346        let expected_uuid = Uuid::parse_str("66e5c5fa-b1b9-5980-8d85-432c1874521a").unwrap();
347
348        if let Ok(processed_result) = result {
349            assert_eq!(String::from_utf8(processed_result.payload).unwrap(), json);
350            assert_eq!(processed_result.routing_hint, expected_uuid);
351        } else {
352            panic!("Failed to process check-in")
353        }
354    }
355
356    #[test]
357    fn process_empty_slug() {
358        let json = r#"{
359          "check_in_id": "a460c25ff2554577b920fcfacae4e5eb",
360          "monitor_slug": "",
361          "status": "in_progress"
362        }"#;
363
364        let result = process_check_in(json.as_bytes(), ProjectId::new(1));
365        assert!(matches!(result, Err(ProcessCheckInError::EmptySlug)));
366    }
367
368    #[test]
369    fn process_invalid_environment() {
370        let json = r#"{
371          "check_in_id": "a460c25ff2554577b920fcfacae4e5eb",
372          "monitor_slug": "test",
373          "status": "in_progress",
374          "environment": "1234567890123456789012345678901234567890123456789012345678901234567890"
375        }"#;
376
377        let result = process_check_in(json.as_bytes(), ProjectId::new(1));
378        assert!(matches!(
379            result,
380            Err(ProcessCheckInError::InvalidEnvironment)
381        ));
382    }
383}