relay_monitors/
lib.rs

1//! Monitors protocol and processing for Sentry.
2//!
3//! [Monitors] allow you to monitor the uptime and performance of any scheduled, recurring job in
4//! Sentry. Once implemented, it'll allow you to get alerts and metrics to help you solve errors,
5//! detect timeouts, and prevent disruptions to your service.
6//!
7//! # API
8//!
9//! The public API documentation is available on [Sentry Docs](https://docs.sentry.io/api/crons/).
10//!
11//! [monitors]: https://docs.sentry.io/product/crons/
12
13#![doc(
14    html_logo_url = "https://raw.githubusercontent.com/getsentry/relay/master/artwork/relay-icon.png",
15    html_favicon_url = "https://raw.githubusercontent.com/getsentry/relay/master/artwork/relay-icon.png"
16)]
17#![warn(missing_docs)]
18
19use std::sync::OnceLock;
20
21use relay_base_schema::project::ProjectId;
22use relay_event_schema::protocol::TraceId;
23use serde::{Deserialize, Serialize};
24use uuid::Uuid;
25
26/// Maximum length of monitor slugs.
27const SLUG_LENGTH: usize = 50;
28
29/// Maximum length of environment names.
30const ENVIRONMENT_LENGTH: usize = 64;
31
32/// Error returned from [`process_check_in`].
33#[derive(Debug, thiserror::Error)]
34pub enum ProcessCheckInError {
35    /// Failed to deserialize the payload.
36    #[error("failed to deserialize check in")]
37    Json(#[from] serde_json::Error),
38
39    /// Monitor slug was empty after slugification.
40    #[error("the monitor slug is empty or invalid")]
41    EmptySlug,
42
43    /// Environment name was invalid.
44    #[error("the environment is invalid")]
45    InvalidEnvironment,
46}
47
48/// Describes the status of the incoming CheckIn.
49#[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)]
50#[serde(rename_all = "snake_case")]
51pub enum CheckInStatus {
52    /// Check-in had no issues during execution.
53    Ok,
54    /// Check-in failed or otherwise had some issues.
55    Error,
56    /// Check-in is expectred to complete.
57    InProgress,
58    /// Monitor did not check in on time.
59    Missed,
60    /// No status was passed.
61    #[serde(other)]
62    Unknown,
63}
64
65#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
66#[serde(rename_all = "snake_case")]
67#[serde(tag = "type")]
68enum Schedule {
69    Crontab { value: String },
70    Interval { value: u64, unit: IntervalName },
71}
72
73#[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)]
74#[serde(rename_all = "snake_case")]
75enum IntervalName {
76    Year,
77    Month,
78    Week,
79    Day,
80    Hour,
81    Minute,
82}
83
84/// The monitor configuration payload for upserting monitors during check-in
85#[derive(Debug, Deserialize, Serialize)]
86pub struct MonitorConfig {
87    /// The monitor schedule configuration
88    schedule: Schedule,
89
90    /// How long (in minutes) after the expected checkin time will we wait until we consider the
91    /// checkin to have been missed.
92    #[serde(default, skip_serializing_if = "Option::is_none")]
93    checkin_margin: Option<u64>,
94
95    /// How long (in minutes) is the check-in allowed to run for in in_progress before it is
96    /// considered failed.
97    #[serde(default, skip_serializing_if = "Option::is_none")]
98    max_runtime: Option<u64>,
99
100    /// tz database style timezone string
101    #[serde(default, skip_serializing_if = "Option::is_none")]
102    timezone: Option<String>,
103
104    /// How many consecutive failed check-ins it takes to create an issue.
105    #[serde(default, skip_serializing_if = "Option::is_none")]
106    failure_issue_threshold: Option<u64>,
107
108    /// How many consecutive OK check-ins it takes to resolve an issue.
109    #[serde(default, skip_serializing_if = "Option::is_none")]
110    recovery_threshold: Option<u64>,
111
112    /// Who the owner of the monitor should be. Uses the ActorTuple [0]
113    /// identifier format.
114    ///
115    /// [0]: https://github.com/getsentry/sentry/blob/3644f5c4f2a99073bf925181b5237a6e05c1d6c2/src/sentry/utils/actor.py#L17
116    #[serde(default, skip_serializing_if = "Option::is_none")]
117    owner: Option<String>,
118}
119
120/// The trace context sent with a check-in.
121#[derive(Debug, Deserialize, Serialize)]
122pub struct CheckInTrace {
123    /// Trace-ID of the check-in.
124    trace_id: TraceId,
125}
126
127/// Any contexts sent in the check-in payload.
128#[derive(Debug, Deserialize, Serialize)]
129pub struct CheckInContexts {
130    /// Trace context sent with a check-in.
131    #[serde(default, skip_serializing_if = "Option::is_none")]
132    trace: Option<CheckInTrace>,
133}
134
135/// A check in id is expected to have the same format as a trace id, so we just alias the type.
136type CheckInId = TraceId;
137
138/// The monitor check-in payload.
139#[derive(Debug, Deserialize, Serialize)]
140pub struct CheckIn {
141    /// Unique identifier of this check-in.
142    #[serde(default)]
143    pub check_in_id: CheckInId,
144
145    /// Identifier of the monitor for this check-in.
146    #[serde(default)]
147    pub monitor_slug: String,
148
149    /// Status of this check-in. Defaults to `"unknown"`.
150    pub status: CheckInStatus,
151
152    /// The environment to associate the check-in with
153    #[serde(default, skip_serializing_if = "Option::is_none")]
154    pub environment: Option<String>,
155
156    /// Duration of this check since it has started in seconds.
157    #[serde(default, skip_serializing_if = "Option::is_none")]
158    pub duration: Option<f64>,
159
160    /// monitor configuration to support upserts.
161    #[serde(default, skip_serializing_if = "Option::is_none")]
162    pub monitor_config: Option<MonitorConfig>,
163
164    /// Contexts describing the associated environment of the job run.
165    /// Only supports trace for now.
166    #[serde(default, skip_serializing_if = "Option::is_none")]
167    pub contexts: Option<CheckInContexts>,
168}
169
170/// The result from calling process_check_in
171pub struct ProcessedCheckInResult {
172    /// The routing key to be used for the check-in payload.
173    ///
174    /// Important to help ensure monitor check-ins are processed in order by routing check-ins from
175    /// the same monitor to the same place.
176    pub routing_hint: Uuid,
177
178    /// The JSON payload of the processed check-in.
179    pub payload: Vec<u8>,
180}
181
182/// Normalizes a monitor check-in payload.
183pub fn process_check_in(
184    payload: &[u8],
185    project_id: ProjectId,
186) -> Result<ProcessedCheckInResult, ProcessCheckInError> {
187    let mut check_in = serde_json::from_slice::<CheckIn>(payload)?;
188
189    // Missed status cannot be ingested, this is computed on the server.
190    if check_in.status == CheckInStatus::Missed {
191        check_in.status = CheckInStatus::Unknown;
192    }
193
194    trim_slug(&mut check_in.monitor_slug);
195
196    if check_in.monitor_slug.is_empty() {
197        return Err(ProcessCheckInError::EmptySlug);
198    }
199
200    if check_in
201        .environment
202        .as_ref()
203        .is_some_and(|e| e.chars().count() > ENVIRONMENT_LENGTH)
204    {
205        return Err(ProcessCheckInError::InvalidEnvironment);
206    }
207
208    static NAMESPACE: OnceLock<Uuid> = OnceLock::new();
209    let namespace = NAMESPACE
210        .get_or_init(|| Uuid::new_v5(&Uuid::NAMESPACE_URL, b"https://sentry.io/crons/#did"));
211
212    // Use the project_id + monitor_slug as the routing key hint. This helps ensure monitor
213    // check-ins are processed in order by consistently routing check-ins from the same monitor.
214
215    let slug = &check_in.monitor_slug;
216    let project_id_slug_key = format!("{project_id}:{slug}");
217
218    let routing_hint = Uuid::new_v5(namespace, project_id_slug_key.as_bytes());
219
220    Ok(ProcessedCheckInResult {
221        routing_hint,
222        payload: serde_json::to_vec(&check_in)?,
223    })
224}
225
226fn trim_slug(slug: &mut String) {
227    if let Some((overflow, _)) = slug.char_indices().nth(SLUG_LENGTH) {
228        slug.truncate(overflow);
229    }
230}
231
232#[cfg(test)]
233mod tests {
234    use similar_asserts::assert_eq;
235
236    use super::*;
237
238    #[test]
239    fn truncate_basic() {
240        let mut test1 = "test_".repeat(50);
241        trim_slug(&mut test1);
242        assert_eq!("test_test_test_test_test_test_test_test_test_test_", test1,);
243
244        let mut test2 = "🦀".repeat(SLUG_LENGTH + 10);
245        trim_slug(&mut test2);
246        assert_eq!("🦀".repeat(SLUG_LENGTH), test2);
247    }
248
249    #[test]
250    fn serialize_json_roundtrip() {
251        let json = r#"{
252  "check_in_id": "a460c25ff2554577b920fcfacae4e5eb",
253  "monitor_slug": "my-monitor",
254  "status": "in_progress",
255  "environment": "production",
256  "duration": 21.0,
257  "contexts": {
258    "trace": {
259      "trace_id": "8f431b7aa08441bbbd5a0100fd91f9fe"
260    }
261  }
262}"#;
263
264        let check_in = serde_json::from_str::<CheckIn>(json).unwrap();
265        let serialized = serde_json::to_string_pretty(&check_in).unwrap();
266
267        assert_eq!(json, serialized);
268    }
269
270    #[test]
271    fn serialize_with_upsert_short() {
272        let json = r#"{
273  "check_in_id": "a460c25ff2554577b920fcfacae4e5eb",
274  "monitor_slug": "my-monitor",
275  "status": "in_progress",
276  "monitor_config": {
277    "schedule": {
278      "type": "crontab",
279      "value": "0 * * * *"
280    }
281  }
282}"#;
283
284        let check_in = serde_json::from_str::<CheckIn>(json).unwrap();
285        let serialized = serde_json::to_string_pretty(&check_in).unwrap();
286
287        assert_eq!(json, serialized);
288    }
289
290    #[test]
291    fn serialize_with_upsert_interval() {
292        let json = r#"{
293  "check_in_id": "a460c25ff2554577b920fcfacae4e5eb",
294  "monitor_slug": "my-monitor",
295  "status": "in_progress",
296  "monitor_config": {
297    "schedule": {
298      "type": "interval",
299      "value": 5,
300      "unit": "day"
301    },
302    "checkin_margin": 5,
303    "max_runtime": 10,
304    "timezone": "America/Los_Angles",
305    "failure_issue_threshold": 3,
306    "recovery_threshold": 1
307  }
308}"#;
309
310        let check_in = serde_json::from_str::<CheckIn>(json).unwrap();
311        let serialized = serde_json::to_string_pretty(&check_in).unwrap();
312
313        assert_eq!(json, serialized);
314    }
315
316    #[test]
317    fn serialize_with_upsert_full() {
318        let json = r#"{
319  "check_in_id": "a460c25ff2554577b920fcfacae4e5eb",
320  "monitor_slug": "my-monitor",
321  "status": "in_progress",
322  "monitor_config": {
323    "schedule": {
324      "type": "crontab",
325      "value": "0 * * * *"
326    },
327    "checkin_margin": 5,
328    "max_runtime": 10,
329    "timezone": "America/Los_Angles",
330    "failure_issue_threshold": 3,
331    "recovery_threshold": 1,
332    "owner": "user:123"
333  }
334}"#;
335
336        let check_in = serde_json::from_str::<CheckIn>(json).unwrap();
337        let serialized = serde_json::to_string_pretty(&check_in).unwrap();
338
339        assert_eq!(json, serialized);
340    }
341
342    #[test]
343    fn process_simple() {
344        let json = r#"{"check_in_id":"a460c25ff2554577b920fcfacae4e5eb","monitor_slug":"my-monitor","status":"ok"}"#;
345
346        let result = process_check_in(json.as_bytes(), ProjectId::new(1));
347
348        // The routing_hint should be consistent for the (project_id, monitor_slug)
349        let expected_uuid = Uuid::parse_str("66e5c5fa-b1b9-5980-8d85-432c1874521a").unwrap();
350
351        if let Ok(processed_result) = result {
352            assert_eq!(String::from_utf8(processed_result.payload).unwrap(), json);
353            assert_eq!(processed_result.routing_hint, expected_uuid);
354        } else {
355            panic!("Failed to process check-in")
356        }
357    }
358
359    #[test]
360    fn process_empty_slug() {
361        let json = r#"{
362          "check_in_id": "a460c25ff2554577b920fcfacae4e5eb",
363          "monitor_slug": "",
364          "status": "in_progress"
365        }"#;
366
367        let result = process_check_in(json.as_bytes(), ProjectId::new(1));
368        assert!(matches!(result, Err(ProcessCheckInError::EmptySlug)));
369    }
370
371    #[test]
372    fn process_invalid_environment() {
373        let json = r#"{
374          "check_in_id": "a460c25ff2554577b920fcfacae4e5eb",
375          "monitor_slug": "test",
376          "status": "in_progress",
377          "environment": "1234567890123456789012345678901234567890123456789012345678901234567890"
378        }"#;
379
380        let result = process_check_in(json.as_bytes(), ProjectId::new(1));
381        assert!(matches!(
382            result,
383            Err(ProcessCheckInError::InvalidEnvironment)
384        ));
385    }
386}