relay_server/statsd.rs
1use relay_statsd::{CounterMetric, DistributionMetric, GaugeMetric, TimerMetric};
2#[cfg(doc)]
3use relay_system::RuntimeMetrics;
4
5/// Gauge metrics used by Relay
6pub enum RelayGauges {
7 /// Tracks the number of futures waiting to be executed in the pool's queue.
8 ///
9 /// Useful for understanding the backlog of work and identifying potential bottlenecks.
10 ///
11 /// This metric is tagged with:
12 /// - `pool`: the name of the pool.
13 AsyncPoolQueueSize,
14 /// Tracks the utilization of the async pool.
15 ///
16 /// The utilization is a value between 0.0 and 100.0 which determines how busy the pool is doing
17 /// CPU-bound work.
18 ///
19 /// This metric is tagged with:
20 /// - `pool`: the name of the pool.
21 AsyncPoolUtilization,
22 /// Tracks the activity of the async pool.
23 ///
24 /// The activity is a value between 0.0 and 100.0 which determines how busy is the pool
25 /// w.r.t. to its provisioned capacity.
26 ///
27 /// This metric is tagged with:
28 /// - `pool`: the name of the pool.
29 AsyncPoolActivity,
30 /// The state of Relay with respect to the upstream connection.
31 /// Possible values are `0` for normal operations and `1` for a network outage.
32 NetworkOutage,
33 /// The number of individual stacks in the priority queue.
34 ///
35 /// Per combination of `(own_key, sampling_key)`, a new stack is created.
36 BufferStackCount,
37 /// The used disk for the buffer.
38 BufferDiskUsed,
39 /// The currently used memory by the entire system.
40 ///
41 /// Relay uses the same value for its memory health check.
42 SystemMemoryUsed,
43 /// The total system memory.
44 ///
45 /// Relay uses the same value for its memory health check.
46 SystemMemoryTotal,
47 /// The number of connections currently being managed by the Redis Pool.
48 #[cfg(feature = "processing")]
49 RedisPoolConnections,
50 /// The number of idle connections in the Redis Pool.
51 #[cfg(feature = "processing")]
52 RedisPoolIdleConnections,
53 /// The maximum number of connections in the Redis pool.
54 #[cfg(feature = "processing")]
55 RedisPoolMaxConnections,
56 /// The number of futures waiting to grab a connection.
57 #[cfg(feature = "processing")]
58 RedisPoolWaitingForConnection,
59 /// The number of notifications in the broadcast channel of the project cache.
60 ProjectCacheNotificationChannel,
61 /// The number of scheduled and in progress fetches in the project cache.
62 ProjectCacheScheduledFetches,
63 /// Exposes the amount of currently open and handled connections by the server.
64 ServerActiveConnections,
65 /// Maximum delay of a metric bucket in seconds.
66 ///
67 /// The maximum is measured from initial creation of the bucket in an internal Relay
68 /// until it is produced to Kafka.
69 ///
70 /// This metric is tagged with:
71 /// - `namespace`: the metric namespace.
72 #[cfg(feature = "processing")]
73 MetricDelayMax,
74 /// Estimated percentage [0-100] of how busy Relay's internal services are.
75 ///
76 /// This metric is tagged with:
77 /// - `service`: the service name.
78 /// - `instance_id`: a for the service name unique identifier for the running service
79 ServiceUtilization,
80 /// Number of attachment uploads currently in flight.
81 #[cfg(feature = "processing")]
82 ConcurrentAttachmentUploads,
83}
84
85impl GaugeMetric for RelayGauges {
86 fn name(&self) -> &'static str {
87 match self {
88 RelayGauges::AsyncPoolQueueSize => "async_pool.queue_size",
89 RelayGauges::AsyncPoolUtilization => "async_pool.utilization",
90 RelayGauges::AsyncPoolActivity => "async_pool.activity",
91 RelayGauges::NetworkOutage => "upstream.network_outage",
92 RelayGauges::BufferStackCount => "buffer.stack_count",
93 RelayGauges::BufferDiskUsed => "buffer.disk_used",
94 RelayGauges::SystemMemoryUsed => "health.system_memory.used",
95 RelayGauges::SystemMemoryTotal => "health.system_memory.total",
96 #[cfg(feature = "processing")]
97 RelayGauges::RedisPoolConnections => "redis.pool.connections",
98 #[cfg(feature = "processing")]
99 RelayGauges::RedisPoolIdleConnections => "redis.pool.idle_connections",
100 #[cfg(feature = "processing")]
101 RelayGauges::RedisPoolMaxConnections => "redis.pool.max_connections",
102 #[cfg(feature = "processing")]
103 RelayGauges::RedisPoolWaitingForConnection => "redis.pool.waiting_for_connection",
104 RelayGauges::ProjectCacheNotificationChannel => {
105 "project_cache.notification_channel.size"
106 }
107 RelayGauges::ProjectCacheScheduledFetches => "project_cache.fetches.size",
108 RelayGauges::ServerActiveConnections => "server.http.connections",
109 #[cfg(feature = "processing")]
110 RelayGauges::MetricDelayMax => "metrics.delay.max",
111 RelayGauges::ServiceUtilization => "service.utilization",
112 #[cfg(feature = "processing")]
113 RelayGauges::ConcurrentAttachmentUploads => "attachment.upload.concurrent",
114 }
115 }
116}
117
118/// Gauge metrics collected from the Runtime.
119pub enum RuntimeGauges {
120 /// Exposes [`RuntimeMetrics::num_idle_threads`].
121 NumIdleThreads,
122 /// Exposes [`RuntimeMetrics::num_alive_tasks`].
123 NumAliveTasks,
124 /// Exposes [`RuntimeMetrics::blocking_queue_depth`].
125 BlockingQueueDepth,
126 /// Exposes [`RuntimeMetrics::num_blocking_threads`].
127 NumBlockingThreads,
128 /// Exposes [`RuntimeMetrics::num_idle_blocking_threads`].
129 NumIdleBlockingThreads,
130 /// Exposes [`RuntimeMetrics::num_workers`].
131 NumWorkers,
132 /// Exposes [`RuntimeMetrics::worker_local_queue_depth`].
133 ///
134 /// This metric is tagged with:
135 /// - `worker`: the worker id.
136 WorkerLocalQueueDepth,
137 /// Exposes [`RuntimeMetrics::worker_mean_poll_time`].
138 ///
139 /// This metric is tagged with:
140 /// - `worker`: the worker id.
141 WorkerMeanPollTime,
142}
143
144impl GaugeMetric for RuntimeGauges {
145 fn name(&self) -> &'static str {
146 match self {
147 RuntimeGauges::NumIdleThreads => "runtime.idle_threads",
148 RuntimeGauges::NumAliveTasks => "runtime.alive_tasks",
149 RuntimeGauges::BlockingQueueDepth => "runtime.blocking_queue_depth",
150 RuntimeGauges::NumBlockingThreads => "runtime.num_blocking_threads",
151 RuntimeGauges::NumIdleBlockingThreads => "runtime.num_idle_blocking_threads",
152 RuntimeGauges::NumWorkers => "runtime.num_workers",
153 RuntimeGauges::WorkerLocalQueueDepth => "runtime.worker_local_queue_depth",
154 RuntimeGauges::WorkerMeanPollTime => "runtime.worker_mean_poll_time",
155 }
156 }
157}
158
159/// Counter metrics collected from the Runtime.
160pub enum RuntimeCounters {
161 /// Exposes [`RuntimeMetrics::budget_forced_yield_count`].
162 BudgetForcedYieldCount,
163 /// Exposes [`RuntimeMetrics::worker_local_schedule_count`].
164 ///
165 /// This metric is tagged with:
166 /// - `worker`: the worker id.
167 WorkerLocalScheduleCount,
168 /// Exposes [`RuntimeMetrics::worker_noop_count`].
169 ///
170 /// This metric is tagged with:
171 /// - `worker`: the worker id.
172 WorkerNoopCount,
173 /// Exposes [`RuntimeMetrics::worker_overflow_count`].
174 ///
175 /// This metric is tagged with:
176 /// - `worker`: the worker id.
177 WorkerOverflowCount,
178 /// Exposes [`RuntimeMetrics::worker_park_count`].
179 ///
180 /// This metric is tagged with:
181 /// - `worker`: the worker id.
182 WorkerParkCount,
183 /// Exposes [`RuntimeMetrics::worker_poll_count`].
184 ///
185 /// This metric is tagged with:
186 /// - `worker`: the worker id.
187 WorkerPollCount,
188 /// Exposes [`RuntimeMetrics::worker_steal_count`].
189 ///
190 /// This metric is tagged with:
191 /// - `worker`: the worker id.
192 WorkerStealCount,
193 /// Exposes [`RuntimeMetrics::worker_steal_operations`].
194 ///
195 /// This metric is tagged with:
196 /// - `worker`: the worker id.
197 WorkerStealOperations,
198 /// Exposes [`RuntimeMetrics::worker_total_busy_duration`].
199 ///
200 /// This metric is tagged with:
201 /// - `worker`: the worker id.
202 WorkerTotalBusyDuration,
203}
204
205impl CounterMetric for RuntimeCounters {
206 fn name(&self) -> &'static str {
207 match self {
208 RuntimeCounters::BudgetForcedYieldCount => "runtime.budget_forced_yield_count",
209 RuntimeCounters::WorkerLocalScheduleCount => "runtime.worker_local_schedule_count",
210 RuntimeCounters::WorkerNoopCount => "runtime.worker_noop_count",
211 RuntimeCounters::WorkerOverflowCount => "runtime.worker_overflow_count",
212 RuntimeCounters::WorkerParkCount => "runtime.worker_park_count",
213 RuntimeCounters::WorkerPollCount => "runtime.worker_poll_count",
214 RuntimeCounters::WorkerStealCount => "runtime.worker_steal_count",
215 RuntimeCounters::WorkerStealOperations => "runtime.worker_steal_operations",
216 RuntimeCounters::WorkerTotalBusyDuration => "runtime.worker_total_busy_duration",
217 }
218 }
219}
220
221/// Histogram metrics used by Relay.
222pub enum RelayDistributions {
223 /// The number of bytes received by Relay for each individual envelope item type.
224 ///
225 /// This metric is tagged with:
226 /// - `item_type`: The type of the items being counted.
227 /// - `is_container`: Whether this item is a container holding multiple items.
228 EnvelopeItemSize,
229
230 /// Number of elements in the envelope buffer across all the stacks.
231 ///
232 /// This metric is tagged with:
233 /// - `storage_type`: The type of storage used in the envelope buffer.
234 BufferEnvelopesCount,
235 /// The amount of bytes in the item payloads of an envelope pushed to the envelope buffer.
236 ///
237 /// This is not quite the same as the actual size of a serialized envelope, because it ignores
238 /// the envelope header and item headers.
239 BufferEnvelopeBodySize,
240 /// Size of a serialized envelope pushed to the envelope buffer.
241 BufferEnvelopeSize,
242 /// Size of a compressed envelope pushed to the envelope buffer.
243 BufferEnvelopeSizeCompressed,
244 /// The number of batches emitted per partition.
245 BatchesPerPartition,
246 /// The number of buckets in a batch emitted.
247 ///
248 /// This corresponds to the number of buckets that will end up in an envelope.
249 BucketsPerBatch,
250 /// The number of spans per processed transaction event.
251 ///
252 /// This metric is tagged with:
253 /// - `platform`: The event's platform, such as `"javascript"`.
254 /// - `sdk`: The name of the Sentry SDK sending the transaction. This tag is only set for
255 /// Sentry's SDKs and defaults to "proprietary".
256 EventSpans,
257 /// Number of projects in the in-memory project cache that are waiting for their state to be
258 /// updated.
259 ///
260 /// See `project_cache.size` for more description of the project cache.
261 ProjectStatePending,
262 /// Number of project states **requested** from the upstream for each batch request.
263 ///
264 /// If multiple batches are updated concurrently, this metric is reported multiple times.
265 ///
266 /// The batch size can be configured with `cache.batch_size`. See `project_cache.size` for more
267 /// description of the project cache.
268 ProjectStateRequestBatchSize,
269 /// Number of project states **returned** from the upstream for each batch request.
270 ///
271 /// If multiple batches are updated concurrently, this metric is reported multiple times.
272 ///
273 /// See `project_cache.size` for more description of the project cache.
274 ProjectStateReceived,
275 /// Number of attempts required to fetch the config for a given project key.
276 ProjectStateAttempts,
277 /// Number of project states currently held in the in-memory project cache.
278 ///
279 /// The cache duration for project states can be configured with the following options:
280 ///
281 /// - `cache.project_expiry`: The time after which a project state counts as expired. It is
282 /// automatically refreshed if a request references the project after it has expired.
283 /// - `cache.project_grace_period`: The time after expiry at which the project state will still
284 /// be used to ingest events. Once the grace period expires, the cache is evicted and new
285 /// requests wait for an update.
286 ///
287 /// There is no limit to the number of cached projects.
288 ProjectStateCacheSize,
289 /// The size of the compressed project config in the redis cache, in bytes.
290 #[cfg(feature = "processing")]
291 ProjectStateSizeBytesCompressed,
292 /// The size of the uncompressed project config in the redis cache, in bytes.
293 #[cfg(feature = "processing")]
294 ProjectStateSizeBytesDecompressed,
295 /// The number of upstream requests queued up for sending.
296 ///
297 /// Relay employs connection keep-alive whenever possible. Connections are kept open for _15_
298 /// seconds of inactivity or _75_ seconds of activity. If all connections are busy, they are
299 /// queued, which is reflected in this metric.
300 ///
301 /// This metric is tagged with:
302 /// - `priority`: The queueing priority of the request, either `"high"` or `"low"`. The
303 /// priority determines precedence in executing requests.
304 ///
305 /// The number of concurrent connections can be configured with:
306 /// - `limits.max_concurrent_requests` for the overall number of connections
307 /// - `limits.max_concurrent_queries` for the number of concurrent high-priority requests
308 UpstreamMessageQueueSize,
309 /// Counts the number of retries for each upstream http request.
310 ///
311 /// This metric is tagged with:
312 ///
313 /// - `result`: What happened to the request, an enumeration with the following values:
314 /// * `success`: The request was sent and returned a success code `HTTP 2xx`
315 /// * `response_error`: The request was sent and it returned an HTTP error.
316 /// * `payload_failed`: The request was sent but there was an error in interpreting the response.
317 /// * `send_failed`: Failed to send the request due to a network error.
318 /// * `rate_limited`: The request was rate limited.
319 /// * `invalid_json`: The response could not be parsed back into JSON.
320 /// - `route`: The endpoint that was called on the upstream.
321 /// - `status-code`: The status code of the request when available, otherwise "-".
322 UpstreamRetries,
323 /// Size of envelopes sent over HTTP in bytes.
324 UpstreamQueryBodySize,
325 /// Size of queries (projectconfig queries, i.e. the request payload, not the response) sent by
326 /// Relay over HTTP in bytes.
327 UpstreamEnvelopeBodySize,
328 /// Size of batched global metrics requests sent by Relay over HTTP in bytes.
329 UpstreamMetricsBodySize,
330 /// Distribution of flush buckets over partition keys.
331 ///
332 /// The distribution of buckets should be even.
333 /// If it is not, this metric should expose it.
334 PartitionKeys,
335 /// Measures how many splits were performed when sending out a partition.
336 PartitionSplits,
337}
338
339impl DistributionMetric for RelayDistributions {
340 fn name(&self) -> &'static str {
341 match self {
342 Self::EnvelopeItemSize => "event.item_size",
343 Self::EventSpans => "event.spans",
344 Self::BatchesPerPartition => "metrics.buckets.batches_per_partition",
345 Self::BucketsPerBatch => "metrics.buckets.per_batch",
346 Self::BufferEnvelopesCount => "buffer.envelopes_count",
347 Self::BufferEnvelopeBodySize => "buffer.envelope_body_size",
348 Self::BufferEnvelopeSize => "buffer.envelope_size",
349 Self::BufferEnvelopeSizeCompressed => "buffer.envelope_size.compressed",
350 Self::ProjectStatePending => "project_state.pending",
351 Self::ProjectStateAttempts => "project_state.attempts",
352 Self::ProjectStateRequestBatchSize => "project_state.request.batch_size",
353 Self::ProjectStateReceived => "project_state.received",
354 Self::ProjectStateCacheSize => "project_cache.size",
355 #[cfg(feature = "processing")]
356 Self::ProjectStateSizeBytesCompressed => "project_state.size_bytes.compressed",
357 #[cfg(feature = "processing")]
358 Self::ProjectStateSizeBytesDecompressed => "project_state.size_bytes.decompressed",
359 Self::UpstreamMessageQueueSize => "http_queue.size",
360 Self::UpstreamRetries => "upstream.retries",
361 Self::UpstreamQueryBodySize => "upstream.query.body_size",
362 Self::UpstreamEnvelopeBodySize => "upstream.envelope.body_size",
363 Self::UpstreamMetricsBodySize => "upstream.metrics.body_size",
364 Self::PartitionKeys => "metrics.buckets.partition_keys",
365 Self::PartitionSplits => "partition_splits",
366 }
367 }
368}
369
370/// Timer metrics used by Relay
371pub enum RelayTimers {
372 /// Time in milliseconds spent deserializing an event from JSON bytes into the native data
373 /// structure on which Relay operates.
374 EventProcessingDeserialize,
375 /// Time in milliseconds spent running normalization on an event. Normalization
376 /// happens before envelope filtering and metric extraction.
377 EventProcessingNormalization,
378 /// Time in milliseconds spent running inbound data filters on an event.
379 EventProcessingFiltering,
380 /// Time in milliseconds spent checking for organization, project, and DSN rate limits.
381 ///
382 /// Not all events reach this point. After an event is rate limited for the first time, the rate
383 /// limit is cached. Events coming in after this will be discarded earlier in the request queue
384 /// and do not reach the processing queue.
385 ///
386 /// This metric is tagged with:
387 /// - `type`: The type of limiter executed, `cached` or `consistent`.
388 /// - `unit`: The item/unit of work which is being rate limited, only available for new
389 /// processing pipelines.
390 EventProcessingRateLimiting,
391 /// Time in milliseconds spent in data scrubbing for the current event. Data scrubbing happens
392 /// last before serializing the event back to JSON.
393 EventProcessingPii,
394 /// Time spent converting the event from its in-memory reprsentation into a JSON string.
395 EventProcessingSerialization,
396 /// Time used to extract span metrics from an event.
397 EventProcessingSpanMetricsExtraction,
398 /// Time spent between the start of request handling and processing of the envelope.
399 ///
400 /// This includes streaming the request body, scheduling overheads, project config fetching,
401 /// batched requests and congestions in the internal processor. This does not include delays in
402 /// the incoming request (body upload) and skips all envelopes that are fast-rejected.
403 EnvelopeWaitTime,
404 /// Time in milliseconds spent in synchronous processing of envelopes.
405 ///
406 /// This timing covers the end-to-end processing in the CPU pool and comprises:
407 ///
408 /// - `event_processing.deserialize`
409 /// - `event_processing.pii`
410 /// - `event_processing.serialization`
411 ///
412 /// With Relay in processing mode, this also includes the following timings:
413 ///
414 /// - `event_processing.process`
415 /// - `event_processing.filtering`
416 /// - `event_processing.rate_limiting`
417 EnvelopeProcessingTime,
418 /// Total time in milliseconds an envelope spends in Relay from the time it is received until it
419 /// finishes processing and has been submitted to the upstream.
420 EnvelopeTotalTime,
421 /// Latency of project config updates until they reach Relay.
422 ///
423 /// The metric is calculated by using the creation timestamp of the project config
424 /// and when Relay updates its local cache with the new project config.
425 ///
426 /// No metric is emitted when Relay fetches a project config for the first time.
427 ///
428 /// This metric is tagged with:
429 /// - `delay`: Bucketed amount of seconds passed between fetches.
430 ProjectCacheUpdateLatency,
431 /// Total time spent from starting to fetch a project config update to completing the fetch.
432 ProjectCacheFetchDuration,
433 /// Total time in milliseconds spent fetching queued project configuration updates requests to
434 /// resolve.
435 ///
436 /// Relay updates projects in batches. Every update cycle, Relay requests
437 /// `limits.max_concurrent_queries * cache.batch_size` projects from the upstream. This metric
438 /// measures the wall clock time for all concurrent requests in this loop.
439 ///
440 /// Note that after an update loop has completed, there may be more projects pending updates.
441 /// This is indicated by `project_state.pending`.
442 ProjectStateRequestDuration,
443 /// Time in milliseconds required to decompress a project config from redis.
444 ///
445 /// Note that this also times the cases where project config is uncompressed,
446 /// in which case the timer should be very close to zero.
447 #[cfg(feature = "processing")]
448 ProjectStateDecompression,
449 /// Total duration in milliseconds for handling inbound web requests until the HTTP response is
450 /// returned to the client.
451 ///
452 /// This does **not** correspond to the full event ingestion time. Requests for events that are
453 /// not immediately rejected due to bad data or cached rate limits always return `200 OK`. Full
454 /// validation and normalization occur asynchronously, which is reported by
455 /// `event.processing_time`.
456 ///
457 /// This metric is tagged with:
458 /// - `method`: The HTTP method of the request.
459 /// - `route`: Unique dashed identifier of the endpoint.
460 RequestsDuration,
461 /// Time spent on minidump scrubbing.
462 ///
463 /// This is the total time spent on parsing and scrubbing the minidump. Even if no PII
464 /// scrubbing rules applied the minidump will still be parsed and the rules evaluated on
465 /// the parsed minidump, this duration is reported here with status of "n/a".
466 ///
467 /// This metric is tagged with:
468 ///
469 /// - `status`: Scrubbing status: "ok" means successful scrubbed, "error" means there
470 /// was an error during scrubbing and finally "n/a" means scrubbing was successful
471 /// but no scurbbing rules applied.
472 MinidumpScrubbing,
473 /// Time spent on view hierarchy scrubbing.
474 ///
475 /// This is the total time spent on parsing and scrubbing the view hierarchy json file.
476 ///
477 /// This metric is tagged with:
478 ///
479 /// - `status`: "ok" means successful scrubbed, "error" means there was an error during
480 /// scrubbing
481 ViewHierarchyScrubbing,
482 /// Time spend on attachment scrubbing.
483 ///
484 /// This represents the total time spent on evaluating the scrubbing rules for an
485 /// attachment and the attachment scrubbing itself, regardless of whether any rules were
486 /// applied. Note that minidumps which failed to be parsed (status="error" in
487 /// scrubbing.minidumps.duration) will be scrubbed as plain attachments and count
488 /// towards this.
489 ///
490 /// This metric is tagged with:
491 ///
492 /// - `attachment_type`: The type of attachment, e.g. "minidump".
493 AttachmentScrubbing,
494 /// Total time spent to send request to upstream Relay and handle the response.
495 ///
496 /// This metric is tagged with:
497 ///
498 /// - `result`: What happened to the request, an enumeration with the following values:
499 /// * `success`: The request was sent and returned a success code `HTTP 2xx`
500 /// * `response_error`: The request was sent and it returned an HTTP error.
501 /// * `payload_failed`: The request was sent but there was an error in interpreting the response.
502 /// * `send_failed`: Failed to send the request due to a network error.
503 /// * `rate_limited`: The request was rate limited.
504 /// * `invalid_json`: The response could not be parsed back into JSON.
505 /// - `route`: The endpoint that was called on the upstream.
506 /// - `status-code`: The status code of the request when available, otherwise "-".
507 /// - `retries`: Number of retries bucket 0, 1, 2, few (3 - 10), many (more than 10).
508 UpstreamRequestsDuration,
509 /// The delay between the timestamp stated in a payload and the receive time.
510 ///
511 /// SDKs cannot transmit payloads immediately in all cases. Sometimes, crashes require that
512 /// events are sent after restarting the application. Similarly, SDKs buffer events during
513 /// network downtimes for later transmission. This metric measures the delay between the time of
514 /// the event and the time it arrives in Relay. The delay is measured after clock drift
515 /// correction is applied.
516 ///
517 /// Only payloads with a delay of more than 1 minute are captured.
518 ///
519 /// This metric is tagged with:
520 ///
521 /// - `category`: The data category of the payload. Can be one of: `event`, `transaction`,
522 /// `security`, or `session`.
523 TimestampDelay,
524 /// The time it takes the outcome aggregator to flush aggregated outcomes.
525 OutcomeAggregatorFlushTime,
526 /// Time in milliseconds spent on parsing, normalizing and scrubbing replay recordings.
527 ReplayRecordingProcessing,
528 /// Total time spent to send a request and receive the response from upstream.
529 GlobalConfigRequestDuration,
530 /// Timing in milliseconds for processing a message in the internal CPU pool.
531 ///
532 /// This metric is tagged with:
533 ///
534 /// - `message`: The type of message that was processed.
535 ProcessMessageDuration,
536 /// Timing in milliseconds for processing a task in the project cache service.
537 ///
538 /// This metric is tagged with:
539 /// - `task`: The type of the task the project cache does.
540 ProjectCacheTaskDuration,
541 /// Timing in milliseconds for handling and responding to a health check request.
542 ///
543 /// This metric is tagged with:
544 /// - `type`: The type of the health check, `liveness` or `readiness`.
545 HealthCheckDuration,
546 /// Temporary timing metric for how much time was spent evaluating span and transaction
547 /// rate limits using the `RateLimitBuckets` message in the processor.
548 ///
549 /// This metric is tagged with:
550 /// - `category`: The data category evaluated.
551 /// - `limited`: Whether the batch is rate limited.
552 /// - `count`: How many items matching the data category are contained in the batch.
553 #[cfg(feature = "processing")]
554 RateLimitBucketsDuration,
555 /// Timing in milliseconds for processing a task in the aggregator service.
556 ///
557 /// This metric is tagged with:
558 /// - `task`: The task being executed by the aggregator.
559 /// - `aggregator`: The name of the aggregator.
560 AggregatorServiceDuration,
561 /// Timing in milliseconds for processing a message in the metric router service.
562 ///
563 /// This metric is tagged with:
564 /// - `message`: The type of message that was processed.
565 MetricRouterServiceDuration,
566 /// Timing in milliseconds for processing a message in the metric store service.
567 ///
568 /// This metric is tagged with:
569 /// - `message`: The type of message that was processed.
570 #[cfg(feature = "processing")]
571 StoreServiceDuration,
572 /// Timing in milliseconds for the time it takes for initialize the buffer.
573 BufferInitialization,
574 /// Timing in milliseconds for the time it takes for the buffer to pack & spool a batch.
575 ///
576 /// Contains the time it takes to pack multiple envelopes into a single memory blob.
577 BufferSpool,
578 /// Timing in milliseconds for the time it takes for the buffer to spool data to SQLite.
579 BufferSqlWrite,
580 /// Timing in milliseconds for the time it takes for the buffer to unspool data from disk.
581 BufferUnspool,
582 /// Timing in milliseconds for the time it takes for the buffer to push.
583 BufferPush,
584 /// Timing in milliseconds for the time it takes for the buffer to peek.
585 BufferPeek,
586 /// Timing in milliseconds for the time it takes for the buffer to pop.
587 BufferPop,
588 /// Timing in milliseconds for the time it takes for the buffer to drain its envelopes.
589 BufferDrain,
590 /// Timing in milliseconds for the time it takes for an envelope to be serialized.
591 BufferEnvelopesSerialization,
592 /// Timing in milliseconds for the time it takes for an envelope to be compressed.
593 BufferEnvelopeCompression,
594 /// Timing in milliseconds for the time it takes for an envelope to be decompressed.
595 BufferEnvelopeDecompression,
596 /// Timing in milliseconds to the time it takes to read an HTTP body.
597 BodyReadDuration,
598 /// Timing in milliseconds to count spans in a serialized transaction payload.
599 CheckNestedSpans,
600 /// The time it needs to create a signature. Includes both the signature used for
601 /// trusted relays and for register challenges.
602 SignatureCreationDuration,
603}
604
605impl TimerMetric for RelayTimers {
606 fn name(&self) -> &'static str {
607 match self {
608 RelayTimers::EventProcessingDeserialize => "event_processing.deserialize",
609 RelayTimers::EventProcessingNormalization => "event_processing.normalization",
610 RelayTimers::EventProcessingFiltering => "event_processing.filtering",
611 RelayTimers::EventProcessingRateLimiting => "event_processing.rate_limiting",
612 RelayTimers::EventProcessingPii => "event_processing.pii",
613 RelayTimers::EventProcessingSpanMetricsExtraction => {
614 "event_processing.span_metrics_extraction"
615 }
616 RelayTimers::EventProcessingSerialization => "event_processing.serialization",
617 RelayTimers::EnvelopeWaitTime => "event.wait_time",
618 RelayTimers::EnvelopeProcessingTime => "event.processing_time",
619 RelayTimers::EnvelopeTotalTime => "event.total_time",
620 RelayTimers::ProjectStateRequestDuration => "project_state.request.duration",
621 #[cfg(feature = "processing")]
622 RelayTimers::ProjectStateDecompression => "project_state.decompression",
623 RelayTimers::ProjectCacheUpdateLatency => "project_cache.latency",
624 RelayTimers::ProjectCacheFetchDuration => "project_cache.fetch.duration",
625 RelayTimers::RequestsDuration => "requests.duration",
626 RelayTimers::MinidumpScrubbing => "scrubbing.minidumps.duration",
627 RelayTimers::ViewHierarchyScrubbing => "scrubbing.view_hierarchy_scrubbing.duration",
628 RelayTimers::AttachmentScrubbing => "scrubbing.attachments.duration",
629 RelayTimers::UpstreamRequestsDuration => "upstream.requests.duration",
630 RelayTimers::TimestampDelay => "requests.timestamp_delay",
631 RelayTimers::OutcomeAggregatorFlushTime => "outcomes.aggregator.flush_time",
632 RelayTimers::ReplayRecordingProcessing => "replay.recording.process",
633 RelayTimers::GlobalConfigRequestDuration => "global_config.requests.duration",
634 RelayTimers::ProcessMessageDuration => "processor.message.duration",
635 RelayTimers::ProjectCacheTaskDuration => "project_cache.task.duration",
636 RelayTimers::HealthCheckDuration => "health.message.duration",
637 #[cfg(feature = "processing")]
638 RelayTimers::RateLimitBucketsDuration => "processor.rate_limit_buckets",
639 RelayTimers::AggregatorServiceDuration => "metrics.aggregator.message.duration",
640 RelayTimers::MetricRouterServiceDuration => "metrics.router.message.duration",
641 #[cfg(feature = "processing")]
642 RelayTimers::StoreServiceDuration => "store.message.duration",
643 RelayTimers::BufferInitialization => "buffer.initialization.duration",
644 RelayTimers::BufferSpool => "buffer.spool.duration",
645 RelayTimers::BufferSqlWrite => "buffer.write.duration",
646 RelayTimers::BufferUnspool => "buffer.unspool.duration",
647 RelayTimers::BufferPush => "buffer.push.duration",
648 RelayTimers::BufferPeek => "buffer.peek.duration",
649 RelayTimers::BufferPop => "buffer.pop.duration",
650 RelayTimers::BufferDrain => "buffer.drain.duration",
651 RelayTimers::BufferEnvelopesSerialization => "buffer.envelopes_serialization",
652 RelayTimers::BufferEnvelopeCompression => "buffer.envelopes_compression",
653 RelayTimers::BufferEnvelopeDecompression => "buffer.envelopes_decompression",
654 RelayTimers::BodyReadDuration => "requests.body_read.duration",
655 RelayTimers::CheckNestedSpans => "envelope.check_nested_spans",
656 RelayTimers::SignatureCreationDuration => "signature.create.duration",
657 }
658 }
659}
660
661/// Counter metrics used by Relay
662pub enum RelayCounters {
663 /// Tracks the number of tasks driven to completion by the async pool.
664 ///
665 /// This metric is tagged with:
666 /// - `pool`: the name of the pool.
667 AsyncPoolFinishedTasks,
668 /// Number of Events that had corrupted (unprintable) event attributes.
669 ///
670 /// This currently checks for `environment` and `release`, for which we know that
671 /// some SDKs may send corrupted values.
672 EventCorrupted,
673 /// Number of envelopes accepted in the current time slot.
674 ///
675 /// This represents requests that have successfully passed rate limits and filters, and have
676 /// been sent to the upstream.
677 ///
678 /// This metric is tagged with:
679 /// - `handling`: Either `"success"` if the envelope was handled correctly, or `"failure"` if
680 /// there was an error or bug.
681 EnvelopeAccepted,
682 /// Number of envelopes rejected in the current time slot.
683 ///
684 /// This includes envelopes being rejected because they are malformed or any other errors during
685 /// processing (including filtered events, invalid payloads, and rate limits).
686 ///
687 /// To check the rejection reason, check `events.outcomes`, instead.
688 ///
689 /// This metric is tagged with:
690 /// - `handling`: Either `"success"` if the envelope was handled correctly, or `"failure"` if
691 /// there was an error or bug.
692 EnvelopeRejected,
693 /// Number of total envelope items we received.
694 ///
695 /// Note: This does not count raw items, it counts the logical amount of items,
696 /// e.g. a single item container counts all its contained items.
697 ///
698 /// This metric is tagged with:
699 /// - `item_type`: The type of the items being counted.
700 /// - `is_container`: Whether this item is a container holding multiple items.
701 /// - `sdk`: The name of the Sentry SDK sending the envelope. This tag is only set for
702 /// Sentry's SDKs and defaults to "proprietary".
703 EnvelopeItems,
704 /// Number of bytes we processed per envelope item.
705 ///
706 /// This metric is tagged with:
707 /// - `item_type`: The type of the items being counted.
708 /// - `is_container`: Whether this item is a container holding multiple items.
709 /// - `sdk`: The name of the Sentry SDK sending the envelope. This tag is only set for
710 /// Sentry's SDKs and defaults to "proprietary".
711 EnvelopeItemBytes,
712 /// Number of times an envelope from the buffer is trying to be popped.
713 BufferTryPop,
714 /// Number of envelopes spool to disk.
715 BufferSpooledEnvelopes,
716 /// Number of envelopes unspooled from disk.
717 BufferUnspooledEnvelopes,
718 /// Number of project changed updates received by the buffer.
719 BufferProjectChangedEvent,
720 /// Number of times one or more projects of an envelope were pending when trying to pop
721 /// their envelope.
722 BufferProjectPending,
723 /// Number of outcomes and reasons for rejected Envelopes.
724 ///
725 /// This metric is tagged with:
726 /// - `outcome`: The basic cause for rejecting the event.
727 /// - `reason`: A more detailed identifier describing the rule or mechanism leading to the
728 /// outcome.
729 /// - `to`: Describes the destination of the outcome. Can be either 'kafka' (when in
730 /// processing mode) or 'http' (when outcomes are enabled in an external relay).
731 ///
732 /// Possible outcomes are:
733 /// - `filtered`: Dropped by inbound data filters. The reason specifies the filter that
734 /// matched.
735 /// - `rate_limited`: Dropped by organization, project, or DSN rate limit, as well as exceeding
736 /// the Sentry plan quota. The reason contains the rate limit or quota that was exceeded.
737 /// - `invalid`: Data was considered invalid and could not be recovered. The reason indicates
738 /// the validation that failed.
739 Outcomes,
740 /// The number of individual outcomes including their quantity.
741 ///
742 /// While [`RelayCounters::Outcomes`] tracks the number of times aggregated outcomes
743 /// have been emitted, this counter tracks the total quantity of individual outcomes.
744 OutcomeQuantity,
745 /// Number of project state HTTP requests.
746 ///
747 /// Relay updates projects in batches. Every update cycle, Relay requests
748 /// `limits.max_concurrent_queries` batches of `cache.batch_size` projects from the upstream.
749 /// The duration of these requests is reported via `project_state.request.duration`.
750 ///
751 /// Note that after an update loop has completed, there may be more projects pending updates.
752 /// This is indicated by `project_state.pending`.
753 ProjectStateRequest,
754 /// Number of times a project state is requested from the central Redis cache.
755 ///
756 /// This metric is tagged with:
757 /// - `hit`: One of:
758 /// - `revision`: the cached version was validated to be up to date using its revision.
759 /// - `project_config`: the request was handled by the cache.
760 /// - `project_config_revision`: the request was handled by the cache and the revision did
761 /// not change.
762 /// - `false`: the request will be sent to the sentry endpoint.
763 #[cfg(feature = "processing")]
764 ProjectStateRedis,
765 /// Number of times a project had a fetch scheduled.
766 ProjectCacheSchedule,
767 /// Number of times an upstream request for a project config is completed.
768 ///
769 /// Completion can be because a result was returned or because the config request was
770 /// dropped after there still was no response after a timeout. This metrics has tags
771 /// for `result` and `attempts` indicating whether it was succesful or a timeout and how
772 /// many attempts were made respectively.
773 ProjectUpstreamCompleted,
774 /// Number of times an upstream request for a project config failed.
775 ///
776 /// Failure can happen, for example, when there's a network error. Refer to
777 /// [`UpstreamRequestError`](crate::services::upstream::UpstreamRequestError) for all cases.
778 ProjectUpstreamFailed,
779 /// Number of Relay server starts.
780 ///
781 /// This can be used to track unwanted restarts due to crashes or termination.
782 ServerStarting,
783 /// Number of messages placed on the Kafka queues.
784 ///
785 /// When Relay operates as Sentry service and an Envelope item is successfully processed, each
786 /// Envelope item results in a dedicated message on one of the ingestion topics on Kafka.
787 ///
788 /// This metric is tagged with:
789 /// - `event_type`: The kind of message produced to Kafka.
790 /// - `namespace` (only for metrics): The namespace that the metric belongs to.
791 /// - `is_segment` (only for event_type span): `true` the span is the root of a segment.
792 /// - `has_parent` (only for event_type span): `false` if the span is the root of a trace.
793 /// - `platform` (only for event_type span): The platform from which the span was spent.
794 /// - `metric_type` (only for event_type metric): The metric type, counter, distribution,
795 /// gauge or set.
796 /// - `metric_encoding` (only for event_type metric): The encoding used for distribution and
797 /// set metrics.
798 ///
799 /// The message types can be:
800 ///
801 /// - `event`: An error or transaction event. Error events are sent to `ingest-events`,
802 /// transactions to `ingest-transactions`, and errors with attachments are sent to
803 /// `ingest-attachments`.
804 /// - `attachment`: An attachment file associated with an error event, sent to
805 /// `ingest-attachments`.
806 /// - `user_report`: A message from the user feedback dialog, sent to `ingest-events`.
807 /// - `session`: A release health session update, sent to `ingest-sessions`.
808 #[cfg(feature = "processing")]
809 ProcessingMessageProduced,
810 /// Number of spans produced in the new format.
811 #[cfg(feature = "processing")]
812 SpanV2Produced,
813 /// Number of events that hit any of the store-like endpoints: Envelope, Store, Security,
814 /// Minidump, Unreal.
815 ///
816 /// The events are counted before they are rate limited, filtered, or processed in any way.
817 ///
818 /// This metric is tagged with:
819 /// - `version`: The event protocol version number defaulting to `7`.
820 EventProtocol,
821 /// The number of transaction events processed by the source of the transaction name.
822 ///
823 /// This metric is tagged with:
824 /// - `platform`: The event's platform, such as `"javascript"`.
825 /// - `source`: The source of the transaction name on the client. See the [transaction source
826 /// documentation](https://develop.sentry.dev/sdk/event-payloads/properties/transaction_info/)
827 /// for all valid values.
828 /// - `contains_slashes`: Whether the transaction name contains `/`. We use this as a heuristic
829 /// to represent URL transactions.
830 EventTransaction,
831 /// The number of transaction events processed grouped by transaction name modifications.
832 /// This metric is tagged with:
833 /// - `source_in`: The source of the transaction name before normalization.
834 /// See the [transaction source
835 /// documentation](https://develop.sentry.dev/sdk/event-payloads/properties/transaction_info/)
836 /// for all valid values.
837 /// - `change`: The mechanism that changed the transaction name.
838 /// Either `"none"`, `"pattern"`, `"rule"`, or `"both"`.
839 /// - `source_out`: The source of the transaction name after normalization.
840 TransactionNameChanges,
841 /// Number of HTTP requests reaching Relay.
842 Requests,
843 /// Number of completed HTTP requests.
844 ///
845 /// This metric is tagged with:
846 ///
847 /// - `status_code`: The HTTP status code number.
848 /// - `method`: The HTTP method used in the request in uppercase.
849 /// - `route`: Unique dashed identifier of the endpoint.
850 ResponsesStatusCodes,
851 /// Number of evicted stale projects from the cache.
852 ///
853 /// Relay scans the in-memory project cache for stale entries in a regular interval configured
854 /// by `cache.eviction_interval`.
855 ///
856 /// The cache duration for project states can be configured with the following options:
857 ///
858 /// - `cache.project_expiry`: The time after which a project state counts as expired. It is
859 /// automatically refreshed if a request references the project after it has expired.
860 /// - `cache.project_grace_period`: The time after expiry at which the project state will still
861 /// be used to ingest events. Once the grace period expires, the cache is evicted and new
862 /// requests wait for an update.
863 EvictingStaleProjectCaches,
864 /// Number of refreshes for stale projects in the cache.
865 RefreshStaleProjectCaches,
866 /// Number of times that parsing a metrics bucket item from an envelope failed.
867 MetricBucketsParsingFailed,
868 /// Count extraction of transaction names. Tag with the decision to drop / replace / use original.
869 MetricsTransactionNameExtracted,
870 /// Number of Events with an OpenTelemetry Context
871 ///
872 /// This metric is tagged with:
873 /// - `platform`: The event's platform, such as `"javascript"`.
874 /// - `sdk`: The name of the Sentry SDK sending the transaction. This tag is only set for
875 /// Sentry's SDKs and defaults to "proprietary".
876 OpenTelemetryEvent,
877 /// Number of global config fetches from upstream. Only 2XX responses are
878 /// considered and ignores send errors (e.g. auth or network errors).
879 ///
880 /// This metric is tagged with:
881 /// - `success`: whether deserializing the global config succeeded.
882 GlobalConfigFetched,
883 /// The number of attachments processed in the same envelope as a user_report_v2 event.
884 FeedbackAttachments,
885 /// All COGS tracked values.
886 ///
887 /// This metric is tagged with:
888 /// - `resource_id`: The COGS resource id.
889 /// - `app_feature`: The COGS app feature.
890 CogsUsage,
891 /// The amount of times metrics of a project have been flushed without the project being
892 /// fetched/available.
893 ProjectStateFlushMetricsNoProject,
894 /// Incremented every time a bucket is dropped.
895 ///
896 /// This should only happen when a project state is invalid during graceful shutdown.
897 ///
898 /// This metric is tagged with:
899 /// - `aggregator`: The name of the metrics aggregator (usually `"default"`).
900 BucketsDropped,
901 /// Incremented every time a segment exceeds the expected limit.
902 ReplayExceededSegmentLimit,
903 /// Incremented every time the server accepts a new connection.
904 ServerSocketAccept,
905 /// Incremented every time the server aborts a connection because of an idle timeout.
906 ServerConnectionIdleTimeout,
907 /// The total delay of metric buckets in seconds.
908 ///
909 /// The delay is measured from initial creation of the bucket in an internal Relay
910 /// until it is produced to Kafka.
911 ///
912 /// Use [`Self::MetricDelayCount`] to calculate the average delay.
913 ///
914 /// This metric is tagged with:
915 /// - `namespace`: the metric namespace.
916 #[cfg(feature = "processing")]
917 MetricDelaySum,
918 /// The amount of buckets counted for the [`Self::MetricDelaySum`] metric.
919 ///
920 /// This metric is tagged with:
921 /// - `namespace`: the metric namespace.
922 #[cfg(feature = "processing")]
923 MetricDelayCount,
924 /// The amount of times PlayStation processing was attempted.
925 #[cfg(all(sentry, feature = "processing"))]
926 PlaystationProcessing,
927 /// The number of times a sampling decision was made.
928 ///
929 /// This metric is tagged with:
930 /// - `item`: what item the decision is taken for (transaction vs span).
931 SamplingDecision,
932 /// The number of times an upload of an attachment occurs.
933 ///
934 /// This metric is tagged with:
935 /// - `result`: `success` or the failure reason.
936 /// - `type`: `envelope` or `attachment_v2`
937 #[cfg(feature = "processing")]
938 AttachmentUpload,
939 /// Whether a logs envelope has a trace context header or not
940 ///
941 /// This metric is tagged with:
942 /// - `dsc`: yes or no
943 EnvelopeWithLogs,
944}
945
946impl CounterMetric for RelayCounters {
947 fn name(&self) -> &'static str {
948 match self {
949 RelayCounters::AsyncPoolFinishedTasks => "async_pool.finished_tasks",
950 RelayCounters::EventCorrupted => "event.corrupted",
951 RelayCounters::EnvelopeAccepted => "event.accepted",
952 RelayCounters::EnvelopeRejected => "event.rejected",
953 RelayCounters::EnvelopeItems => "event.items",
954 RelayCounters::EnvelopeItemBytes => "event.item_bytes",
955 RelayCounters::BufferTryPop => "buffer.try_pop",
956 RelayCounters::BufferSpooledEnvelopes => "buffer.spooled_envelopes",
957 RelayCounters::BufferUnspooledEnvelopes => "buffer.unspooled_envelopes",
958 RelayCounters::BufferProjectChangedEvent => "buffer.project_changed_event",
959 RelayCounters::BufferProjectPending => "buffer.project_pending",
960 RelayCounters::Outcomes => "events.outcomes",
961 RelayCounters::OutcomeQuantity => "events.outcome_quantity",
962 RelayCounters::ProjectStateRequest => "project_state.request",
963 #[cfg(feature = "processing")]
964 RelayCounters::ProjectStateRedis => "project_state.redis.requests",
965 RelayCounters::ProjectUpstreamCompleted => "project_upstream.completed",
966 RelayCounters::ProjectUpstreamFailed => "project_upstream.failed",
967 RelayCounters::ProjectCacheSchedule => "project_cache.schedule",
968 RelayCounters::ServerStarting => "server.starting",
969 #[cfg(feature = "processing")]
970 RelayCounters::ProcessingMessageProduced => "processing.event.produced",
971 #[cfg(feature = "processing")]
972 RelayCounters::SpanV2Produced => "store.produced.span_v2",
973 RelayCounters::EventProtocol => "event.protocol",
974 RelayCounters::EventTransaction => "event.transaction",
975 RelayCounters::TransactionNameChanges => "event.transaction_name_changes",
976 RelayCounters::Requests => "requests",
977 RelayCounters::ResponsesStatusCodes => "responses.status_codes",
978 RelayCounters::EvictingStaleProjectCaches => "project_cache.eviction",
979 RelayCounters::RefreshStaleProjectCaches => "project_cache.refresh",
980 RelayCounters::MetricBucketsParsingFailed => "metrics.buckets.parsing_failed",
981 RelayCounters::MetricsTransactionNameExtracted => "metrics.transaction_name",
982 RelayCounters::OpenTelemetryEvent => "event.opentelemetry",
983 RelayCounters::GlobalConfigFetched => "global_config.fetch",
984 RelayCounters::FeedbackAttachments => "processing.feedback_attachments",
985 RelayCounters::CogsUsage => "cogs.usage",
986 RelayCounters::ProjectStateFlushMetricsNoProject => "project_state.metrics.no_project",
987 RelayCounters::BucketsDropped => "metrics.buckets.dropped",
988 RelayCounters::ReplayExceededSegmentLimit => "replay.segment_limit_exceeded",
989 RelayCounters::ServerSocketAccept => "server.http.accepted",
990 RelayCounters::ServerConnectionIdleTimeout => "server.http.idle_timeout",
991 #[cfg(feature = "processing")]
992 RelayCounters::MetricDelaySum => "metrics.delay.sum",
993 #[cfg(feature = "processing")]
994 RelayCounters::MetricDelayCount => "metrics.delay.count",
995 #[cfg(all(sentry, feature = "processing"))]
996 RelayCounters::PlaystationProcessing => "processing.playstation",
997 RelayCounters::SamplingDecision => "sampling.decision",
998 #[cfg(feature = "processing")]
999 RelayCounters::AttachmentUpload => "attachment.upload",
1000 RelayCounters::EnvelopeWithLogs => "logs.envelope",
1001 }
1002 }
1003}